mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-12 09:59:41 +00:00
merged leejet changes
This commit is contained in:
parent
b6edb79648
commit
a1175cf34f
5 changed files with 296 additions and 176 deletions
|
@ -672,51 +672,81 @@ namespace Flux {
|
|||
}
|
||||
|
||||
// Generate IDs for image patches and text
|
||||
std::vector<std::vector<float>> gen_ids(int h, int w, int patch_size, int index = 0) {
|
||||
std::vector<std::vector<float>> gen_txt_ids(int bs, int context_len) {
|
||||
return std::vector<std::vector<float>>(bs * context_len, std::vector<float>(3, 0.0));
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> gen_img_ids(int h, int w, int patch_size, int bs, int index = 0, int h_offset = 0, int w_offset = 0) {
|
||||
int h_len = (h + (patch_size / 2)) / patch_size;
|
||||
int w_len = (w + (patch_size / 2)) / patch_size;
|
||||
|
||||
std::vector<std::vector<float>> img_ids(h_len * w_len, std::vector<float>(3, (float)index));
|
||||
std::vector<std::vector<float>> img_ids(h_len * w_len, std::vector<float>(3, 0.0));
|
||||
|
||||
std::vector<float> row_ids = linspace(0, h_len - 1, h_len);
|
||||
std::vector<float> col_ids = linspace(0, w_len - 1, w_len);
|
||||
std::vector<float> row_ids = linspace(h_offset, h_len - 1 + h_offset, h_len);
|
||||
std::vector<float> col_ids = linspace(w_offset, w_len - 1 + w_offset, w_len);
|
||||
|
||||
for (int i = 0; i < h_len; ++i) {
|
||||
for (int j = 0; j < w_len; ++j) {
|
||||
img_ids[i * w_len + j][0] = index;
|
||||
img_ids[i * w_len + j][1] = row_ids[i];
|
||||
img_ids[i * w_len + j][2] = col_ids[j];
|
||||
}
|
||||
}
|
||||
|
||||
return img_ids;
|
||||
std::vector<std::vector<float>> img_ids_repeated(bs * img_ids.size(), std::vector<float>(3));
|
||||
for (int i = 0; i < bs; ++i) {
|
||||
for (int j = 0; j < img_ids.size(); ++j) {
|
||||
img_ids_repeated[i * img_ids.size() + j] = img_ids[j];
|
||||
}
|
||||
}
|
||||
return img_ids_repeated;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> concat_ids(const std::vector<std::vector<float>>& a,
|
||||
const std::vector<std::vector<float>>& b,
|
||||
int bs) {
|
||||
size_t a_len = a.size() / bs;
|
||||
size_t b_len = b.size() / bs;
|
||||
std::vector<std::vector<float>> ids(a.size() + b.size(), std::vector<float>(3));
|
||||
for (int i = 0; i < bs; ++i) {
|
||||
for (int j = 0; j < a_len; ++j) {
|
||||
ids[i * (a_len + b_len) + j] = a[i * a_len + j];
|
||||
}
|
||||
for (int j = 0; j < b_len; ++j) {
|
||||
ids[i * (a_len + b_len) + a_len + j] = b[i * b_len + j];
|
||||
}
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> gen_ids(int h, int w, int patch_size, int bs, int context_len, std::vector<ggml_tensor*> ref_latents) {
|
||||
auto txt_ids = gen_txt_ids(bs, context_len);
|
||||
auto img_ids = gen_img_ids(h, w, patch_size, bs);
|
||||
|
||||
auto ids = concat_ids(txt_ids, img_ids, bs);
|
||||
uint64_t curr_h_offset = 0;
|
||||
uint64_t curr_w_offset = 0;
|
||||
for (ggml_tensor* ref : ref_latents) {
|
||||
uint64_t h_offset = 0;
|
||||
uint64_t w_offset = 0;
|
||||
if (ref->ne[1] + curr_h_offset > ref->ne[0] + curr_w_offset) {
|
||||
w_offset = curr_w_offset;
|
||||
} else {
|
||||
h_offset = curr_h_offset;
|
||||
}
|
||||
|
||||
auto ref_ids = gen_img_ids(ref->ne[1], ref->ne[0], patch_size, bs, 1, h_offset, w_offset);
|
||||
ids = concat_ids(ids, ref_ids, bs);
|
||||
|
||||
curr_h_offset = std::max(curr_h_offset, ref->ne[1] + h_offset);
|
||||
curr_w_offset = std::max(curr_w_offset, ref->ne[0] + w_offset);
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
// Generate positional embeddings
|
||||
std::vector<float> gen_pe(std::vector<struct ggml_tensor*> imgs, struct ggml_tensor* context, int patch_size, int theta, const std::vector<int>& axes_dim) {
|
||||
int context_len = context->ne[1];
|
||||
int bs = imgs[0]->ne[3];
|
||||
|
||||
std::vector<std::vector<float>> img_ids;
|
||||
for (int i = 0; i < imgs.size(); i++) {
|
||||
auto x = imgs[i];
|
||||
if (x) {
|
||||
int h = x->ne[1];
|
||||
int w = x->ne[0];
|
||||
std::vector<std::vector<float>> img_ids_i = gen_ids(h, w, patch_size, i);
|
||||
img_ids.insert(img_ids.end(), img_ids_i.begin(), img_ids_i.end());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> txt_ids(bs * context_len, std::vector<float>(3, 0.0));
|
||||
std::vector<std::vector<float>> ids(bs * (context_len + img_ids.size()), std::vector<float>(3));
|
||||
for (int i = 0; i < bs; ++i) {
|
||||
for (int j = 0; j < context_len; ++j) {
|
||||
ids[i * (context_len + img_ids.size()) + j] = txt_ids[j];
|
||||
}
|
||||
for (int j = 0; j < img_ids.size(); ++j) {
|
||||
ids[i * (context_len + img_ids.size()) + context_len + j] = img_ids[j];
|
||||
}
|
||||
}
|
||||
std::vector<float> gen_pe(int h, int w, int patch_size, int bs, int context_len, std::vector<ggml_tensor*> ref_latents, int theta, const std::vector<int>& axes_dim) {
|
||||
std::vector<std::vector<float>> ids = gen_ids(h, w, patch_size, bs, context_len, ref_latents);
|
||||
|
||||
std::vector<std::vector<float>> trans_ids = transpose(ids);
|
||||
size_t pos_len = ids.size();
|
||||
|
@ -843,7 +873,7 @@ namespace Flux {
|
|||
struct ggml_tensor* guidance,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* arange = NULL,
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
std::vector<int> skip_layers = {}) {
|
||||
auto img_in = std::dynamic_pointer_cast<Linear>(blocks["img_in"]);
|
||||
auto txt_in = std::dynamic_pointer_cast<Linear>(blocks["txt_in"]);
|
||||
auto final_layer = std::dynamic_pointer_cast<LastLayer>(blocks["final_layer"]);
|
||||
|
@ -929,8 +959,23 @@ namespace Flux {
|
|||
return img;
|
||||
}
|
||||
|
||||
struct ggml_tensor* process_img(struct ggml_context* ctx,
|
||||
struct ggml_tensor* x) {
|
||||
|
||||
int64_t W = x->ne[0];
|
||||
int64_t H = x->ne[1];
|
||||
int64_t patch_size = 2;
|
||||
int pad_h = (patch_size - H % patch_size) % patch_size;
|
||||
int pad_w = (patch_size - W % patch_size) % patch_size;
|
||||
x = ggml_pad(ctx, x, pad_w, pad_h, 0, 0); // [N, C, H + pad_h, W + pad_w]
|
||||
|
||||
// img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
|
||||
auto img = patchify(ctx, x, patch_size); // [N, h*w, C * patch_size * patch_size]
|
||||
return img;
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
std::vector<struct ggml_tensor*> imgs,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* timestep,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* c_concat,
|
||||
|
@ -938,8 +983,8 @@ namespace Flux {
|
|||
struct ggml_tensor* guidance,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* arange = NULL,
|
||||
std::vector<int> skip_layers = std::vector<int>(),
|
||||
SDVersion version = VERSION_FLUX) {
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
std::vector<int> skip_layers = {}) {
|
||||
// Forward pass of DiT.
|
||||
// x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
|
||||
// timestep: (N,) tensor of diffusion timesteps
|
||||
|
@ -950,47 +995,41 @@ namespace Flux {
|
|||
// pe: (L, d_head/2, 2, 2)
|
||||
// return: (N, C, H, W)
|
||||
|
||||
auto x = imgs[0];
|
||||
GGML_ASSERT(x->ne[3] == 1);
|
||||
|
||||
int64_t W = x->ne[0];
|
||||
int64_t H = x->ne[1];
|
||||
int64_t C = x->ne[2];
|
||||
int64_t patch_size = 2;
|
||||
int pad_h = (patch_size - x->ne[0] % patch_size) % patch_size;
|
||||
int pad_w = (patch_size - x->ne[1] % patch_size) % patch_size;
|
||||
int pad_h = (patch_size - H % patch_size) % patch_size;
|
||||
int pad_w = (patch_size - W % patch_size) % patch_size;
|
||||
|
||||
// img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
|
||||
ggml_tensor* img = NULL; // [N, h*w, C * patch_size * patch_size]
|
||||
int64_t patchified_img_size;
|
||||
for (auto& x : imgs) {
|
||||
int pad_h = (patch_size - x->ne[0] % patch_size) % patch_size;
|
||||
int pad_w = (patch_size - x->ne[1] % patch_size) % patch_size;
|
||||
ggml_tensor* pad_x = ggml_pad(ctx, x, pad_w, pad_h, 0, 0);
|
||||
pad_x = patchify(ctx, pad_x, patch_size);
|
||||
if (img) {
|
||||
img = ggml_concat(ctx, img, pad_x, 1);
|
||||
} else {
|
||||
img = pad_x;
|
||||
patchified_img_size = img->ne[1];
|
||||
}
|
||||
}
|
||||
auto img = process_img(ctx, x);
|
||||
uint64_t img_tokens = img->ne[1];
|
||||
|
||||
if (c_concat != NULL) {
|
||||
ggml_tensor* masked = ggml_view_4d(ctx, c_concat, c_concat->ne[0], c_concat->ne[1], C, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], 0);
|
||||
ggml_tensor* mask = ggml_view_4d(ctx, c_concat, c_concat->ne[0], c_concat->ne[1], 8 * 8, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], c_concat->nb[2] * C);
|
||||
|
||||
masked = ggml_pad(ctx, masked, pad_w, pad_h, 0, 0);
|
||||
mask = ggml_pad(ctx, mask, pad_w, pad_h, 0, 0);
|
||||
|
||||
masked = patchify(ctx, masked, patch_size);
|
||||
mask = patchify(ctx, mask, patch_size);
|
||||
masked = process_img(ctx, masked);
|
||||
mask = process_img(ctx, mask);
|
||||
|
||||
img = ggml_concat(ctx, img, ggml_concat(ctx, masked, mask, 0), 0);
|
||||
}
|
||||
|
||||
auto out = forward_orig(ctx, img, context, timestep, y, guidance, pe, arange, skip_layers); // [N, h*w, C * patch_size * patch_size]
|
||||
out = ggml_cont(ctx, ggml_view_2d(ctx, out, out->ne[0], patchified_img_size, out->nb[1], 0));
|
||||
if (ref_latents.size() > 0) {
|
||||
for (ggml_tensor* ref : ref_latents) {
|
||||
ref = process_img(ctx, ref);
|
||||
img = ggml_concat(ctx, img, ref, 1);
|
||||
}
|
||||
}
|
||||
|
||||
auto out = forward_orig(ctx, img, context, timestep, y, guidance, pe, arange, skip_layers); // [N, num_tokens, C * patch_size * patch_size]
|
||||
if (out->ne[1] > img_tokens) {
|
||||
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size]
|
||||
out = ggml_view_3d(ctx, out, out->ne[0], out->ne[1], img_tokens, out->nb[1], out->nb[2], 0);
|
||||
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [N, h*w, C * patch_size * patch_size]
|
||||
}
|
||||
|
||||
// rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)
|
||||
out = unpatchify(ctx, out, (H + pad_h) / patch_size, (W + pad_w) / patch_size, patch_size); // [N, C, H + pad_h, W + pad_w]
|
||||
|
@ -1076,8 +1115,8 @@ namespace Flux {
|
|||
struct ggml_tensor* c_concat,
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<struct ggml_tensor*> kontext_imgs = std::vector<struct ggml_tensor*>(),
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
GGML_ASSERT(x->ne[3] == 1);
|
||||
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, FLUX_GRAPH_SIZE, false);
|
||||
|
||||
|
@ -1088,9 +1127,7 @@ namespace Flux {
|
|||
if (c_concat != NULL) {
|
||||
c_concat = to_backend(c_concat);
|
||||
}
|
||||
for (auto &img : kontext_imgs){
|
||||
img = to_backend(img);
|
||||
}
|
||||
|
||||
if (flux_params.is_chroma) {
|
||||
const char* SD_CHROMA_ENABLE_GUIDANCE = getenv("SD_CHROMA_ENABLE_GUIDANCE");
|
||||
bool disable_guidance = true;
|
||||
|
@ -1131,10 +1168,11 @@ namespace Flux {
|
|||
if (flux_params.guidance_embed || flux_params.is_chroma) {
|
||||
guidance = to_backend(guidance);
|
||||
}
|
||||
auto imgs = kontext_imgs;
|
||||
imgs.insert(imgs.begin(), x);
|
||||
for (int i = 0; i < ref_latents.size(); i++) {
|
||||
ref_latents[i] = to_backend(ref_latents[i]);
|
||||
}
|
||||
|
||||
pe_vec = flux.gen_pe(imgs, context, 2, flux_params.theta, flux_params.axes_dim);
|
||||
pe_vec = flux.gen_pe(x->ne[1], x->ne[0], 2, x->ne[3], context->ne[1], ref_latents, flux_params.theta, flux_params.axes_dim);
|
||||
int pos_len = pe_vec.size() / flux_params.axes_dim_sum / 2;
|
||||
// LOG_DEBUG("pos_len %d", pos_len);
|
||||
auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, flux_params.axes_dim_sum / 2, pos_len);
|
||||
|
@ -1144,7 +1182,7 @@ namespace Flux {
|
|||
set_backend_tensor_data(pe, pe_vec.data());
|
||||
|
||||
struct ggml_tensor* out = flux.forward(compute_ctx,
|
||||
imgs,
|
||||
x,
|
||||
timesteps,
|
||||
context,
|
||||
c_concat,
|
||||
|
@ -1152,6 +1190,7 @@ namespace Flux {
|
|||
guidance,
|
||||
pe,
|
||||
precompute_arange,
|
||||
ref_latents,
|
||||
skip_layers);
|
||||
|
||||
ggml_build_forward_expand(gf, out);
|
||||
|
@ -1166,17 +1205,17 @@ namespace Flux {
|
|||
struct ggml_tensor* c_concat,
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<struct ggml_tensor*> kontext_imgs = std::vector<struct ggml_tensor*>(),
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL,
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL,
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
// x: [N, in_channels, h, w]
|
||||
// timesteps: [N, ]
|
||||
// context: [N, max_position, hidden_size]
|
||||
// y: [N, adm_in_channels] or [1, adm_in_channels]
|
||||
// guidance: [N, ]
|
||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_graph(x, timesteps, context, c_concat, y, guidance, kontext_imgs, skip_layers);
|
||||
return build_graph(x, timesteps, context, c_concat, y, guidance, ref_latents, skip_layers);
|
||||
};
|
||||
|
||||
GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
||||
|
@ -1216,7 +1255,7 @@ namespace Flux {
|
|||
struct ggml_tensor* out = NULL;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
compute(8, x, timesteps, context, NULL, y, guidance, std::vector<struct ggml_tensor*>(), &out, work_ctx);
|
||||
compute(8, x, timesteps, context, NULL, y, guidance, {}, &out, work_ctx);
|
||||
int t1 = ggml_time_ms();
|
||||
|
||||
print_ggml_tensor(out);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue