mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-10 04:00:53 +00:00
update stable-diffusion.cpp to master-306-2abe945 (#1732)
* update stable-diffusion.cpp to master-52a97b3 * update stable-diffusion.cpp to master-0ebe6fe * update stable-diffusion.cpp to master-301-fd693ac * update stable-diffusion.cpp to master-306-2abe945 * fix taesd file selection
This commit is contained in:
parent
3326bdc00a
commit
42087c3622
32 changed files with 769612 additions and 2673 deletions
217
otherarch/sdcpp/avi_writer.h
Normal file
217
otherarch/sdcpp/avi_writer.h
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
#ifndef __AVI_WRITER_H__
|
||||
#define __AVI_WRITER_H__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "stable-diffusion.h"
|
||||
|
||||
#ifndef INCLUDE_STB_IMAGE_WRITE_H
|
||||
#include "stb_image_write.h"
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint32_t offset;
|
||||
uint32_t size;
|
||||
} avi_index_entry;
|
||||
|
||||
// Write 32-bit little-endian integer
|
||||
void write_u32_le(FILE* f, uint32_t val) {
|
||||
fwrite(&val, 4, 1, f);
|
||||
}
|
||||
|
||||
// Write 16-bit little-endian integer
|
||||
void write_u16_le(FILE* f, uint16_t val) {
|
||||
fwrite(&val, 2, 1, f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an MJPG AVI file from an array of sd_image_t images.
|
||||
* Images are encoded to JPEG using stb_image_write.
|
||||
*
|
||||
* @param filename Output AVI file name.
|
||||
* @param images Array of input images.
|
||||
* @param num_images Number of images in the array.
|
||||
* @param fps Frames per second for the video.
|
||||
* @param quality JPEG quality (0-100).
|
||||
* @return 0 on success, -1 on failure.
|
||||
*/
|
||||
int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality = 90) {
|
||||
if (num_images == 0) {
|
||||
fprintf(stderr, "Error: Image array is empty.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
FILE* f = fopen(filename, "wb");
|
||||
if (!f) {
|
||||
perror("Error opening file for writing");
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t width = images[0].width;
|
||||
uint32_t height = images[0].height;
|
||||
uint32_t channels = images[0].channel;
|
||||
if (channels != 3 && channels != 4) {
|
||||
fprintf(stderr, "Error: Unsupported channel count: %u\n", channels);
|
||||
fclose(f);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// --- RIFF AVI Header ---
|
||||
fwrite("RIFF", 4, 1, f);
|
||||
long riff_size_pos = ftell(f);
|
||||
write_u32_le(f, 0); // Placeholder for file size
|
||||
fwrite("AVI ", 4, 1, f);
|
||||
|
||||
// 'hdrl' LIST (header list)
|
||||
fwrite("LIST", 4, 1, f);
|
||||
write_u32_le(f, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40);
|
||||
fwrite("hdrl", 4, 1, f);
|
||||
|
||||
// 'avih' chunk (AVI main header)
|
||||
fwrite("avih", 4, 1, f);
|
||||
write_u32_le(f, 56);
|
||||
write_u32_le(f, 1000000 / fps); // Microseconds per frame
|
||||
write_u32_le(f, 0); // Max bytes per second
|
||||
write_u32_le(f, 0); // Padding granularity
|
||||
write_u32_le(f, 0x110); // Flags (HASINDEX | ISINTERLEAVED)
|
||||
write_u32_le(f, num_images); // Total frames
|
||||
write_u32_le(f, 0); // Initial frames
|
||||
write_u32_le(f, 1); // Number of streams
|
||||
write_u32_le(f, width * height * 3); // Suggested buffer size
|
||||
write_u32_le(f, width);
|
||||
write_u32_le(f, height);
|
||||
write_u32_le(f, 0); // Reserved
|
||||
write_u32_le(f, 0); // Reserved
|
||||
write_u32_le(f, 0); // Reserved
|
||||
write_u32_le(f, 0); // Reserved
|
||||
|
||||
// 'strl' LIST (stream list)
|
||||
fwrite("LIST", 4, 1, f);
|
||||
write_u32_le(f, 4 + 8 + 56 + 8 + 40);
|
||||
fwrite("strl", 4, 1, f);
|
||||
|
||||
// 'strh' chunk (stream header)
|
||||
fwrite("strh", 4, 1, f);
|
||||
write_u32_le(f, 56);
|
||||
fwrite("vids", 4, 1, f); // Stream type: video
|
||||
fwrite("MJPG", 4, 1, f); // Codec: Motion JPEG
|
||||
write_u32_le(f, 0); // Flags
|
||||
write_u16_le(f, 0); // Priority
|
||||
write_u16_le(f, 0); // Language
|
||||
write_u32_le(f, 0); // Initial frames
|
||||
write_u32_le(f, 1); // Scale
|
||||
write_u32_le(f, fps); // Rate
|
||||
write_u32_le(f, 0); // Start
|
||||
write_u32_le(f, num_images); // Length
|
||||
write_u32_le(f, width * height * 3); // Suggested buffer size
|
||||
write_u32_le(f, (uint32_t)-1); // Quality
|
||||
write_u32_le(f, 0); // Sample size
|
||||
write_u16_le(f, 0); // rcFrame.left
|
||||
write_u16_le(f, 0); // rcFrame.top
|
||||
write_u16_le(f, 0); // rcFrame.right
|
||||
write_u16_le(f, 0); // rcFrame.bottom
|
||||
|
||||
// 'strf' chunk (stream format: BITMAPINFOHEADER)
|
||||
fwrite("strf", 4, 1, f);
|
||||
write_u32_le(f, 40);
|
||||
write_u32_le(f, 40); // biSize
|
||||
write_u32_le(f, width);
|
||||
write_u32_le(f, height);
|
||||
write_u16_le(f, 1); // biPlanes
|
||||
write_u16_le(f, 24); // biBitCount
|
||||
fwrite("MJPG", 4, 1, f); // biCompression (FOURCC)
|
||||
write_u32_le(f, width * height * 3); // biSizeImage
|
||||
write_u32_le(f, 0); // XPelsPerMeter
|
||||
write_u32_le(f, 0); // YPelsPerMeter
|
||||
write_u32_le(f, 0); // Colors used
|
||||
write_u32_le(f, 0); // Colors important
|
||||
|
||||
// 'movi' LIST (video frames)
|
||||
long movi_list_pos = ftell(f);
|
||||
fwrite("LIST", 4, 1, f);
|
||||
long movi_size_pos = ftell(f);
|
||||
write_u32_le(f, 0); // Placeholder for movi size
|
||||
fwrite("movi", 4, 1, f);
|
||||
|
||||
avi_index_entry* index = (avi_index_entry*)malloc(sizeof(avi_index_entry) * num_images);
|
||||
if (!index) {
|
||||
fclose(f);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Encode and write each frame as JPEG
|
||||
struct {
|
||||
uint8_t* buf;
|
||||
size_t size;
|
||||
} jpeg_data;
|
||||
|
||||
for (int i = 0; i < num_images; i++) {
|
||||
jpeg_data.buf = NULL;
|
||||
jpeg_data.size = 0;
|
||||
|
||||
// Callback function to collect JPEG data into memory
|
||||
auto write_to_buf = [](void* context, void* data, int size) {
|
||||
auto jd = (decltype(jpeg_data)*)context;
|
||||
jd->buf = (uint8_t*)realloc(jd->buf, jd->size + size);
|
||||
memcpy(jd->buf + jd->size, data, size);
|
||||
jd->size += size;
|
||||
};
|
||||
|
||||
// Encode to JPEG in memory
|
||||
stbi_write_jpg_to_func(
|
||||
write_to_buf,
|
||||
&jpeg_data,
|
||||
images[i].width,
|
||||
images[i].height,
|
||||
channels,
|
||||
images[i].data,
|
||||
quality);
|
||||
|
||||
// Write '00dc' chunk (video frame)
|
||||
fwrite("00dc", 4, 1, f);
|
||||
write_u32_le(f, jpeg_data.size);
|
||||
index[i].offset = ftell(f) - 8;
|
||||
index[i].size = jpeg_data.size;
|
||||
fwrite(jpeg_data.buf, 1, jpeg_data.size, f);
|
||||
|
||||
// Align to even byte size
|
||||
if (jpeg_data.size % 2)
|
||||
fputc(0, f);
|
||||
|
||||
free(jpeg_data.buf);
|
||||
}
|
||||
|
||||
// Finalize 'movi' size
|
||||
long cur_pos = ftell(f);
|
||||
long movi_size = cur_pos - movi_size_pos - 4;
|
||||
fseek(f, movi_size_pos, SEEK_SET);
|
||||
write_u32_le(f, movi_size);
|
||||
fseek(f, cur_pos, SEEK_SET);
|
||||
|
||||
// Write 'idx1' index
|
||||
fwrite("idx1", 4, 1, f);
|
||||
write_u32_le(f, num_images * 16);
|
||||
for (int i = 0; i < num_images; i++) {
|
||||
fwrite("00dc", 4, 1, f);
|
||||
write_u32_le(f, 0x10);
|
||||
write_u32_le(f, index[i].offset);
|
||||
write_u32_le(f, index[i].size);
|
||||
}
|
||||
|
||||
// Finalize RIFF size
|
||||
cur_pos = ftell(f);
|
||||
long file_size = cur_pos - riff_size_pos - 4;
|
||||
fseek(f, riff_size_pos, SEEK_SET);
|
||||
write_u32_le(f, file_size);
|
||||
fseek(f, cur_pos, SEEK_SET);
|
||||
|
||||
fclose(f);
|
||||
free(index);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // __AVI_WRITER_H__
|
||||
|
|
@ -179,9 +179,9 @@ public:
|
|||
|
||||
auto it = encoder.find(utf8_to_utf32("img</w>"));
|
||||
if (it != encoder.end()) {
|
||||
LOG_DEBUG(" trigger word img already in vocab");
|
||||
LOG_DEBUG("trigger word img already in vocab");
|
||||
} else {
|
||||
LOG_DEBUG(" trigger word img not in vocab yet");
|
||||
LOG_DEBUG("trigger word img not in vocab yet");
|
||||
}
|
||||
|
||||
int rank = 0;
|
||||
|
|
@ -488,14 +488,14 @@ public:
|
|||
blocks["mlp"] = std::shared_ptr<GGMLBlock>(new CLIPMLP(d_model, intermediate_size));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, bool mask = true) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, ggml_backend_t backend, struct ggml_tensor* x, bool mask = true) {
|
||||
// x: [N, n_token, d_model]
|
||||
auto self_attn = std::dynamic_pointer_cast<MultiheadAttention>(blocks["self_attn"]);
|
||||
auto layer_norm1 = std::dynamic_pointer_cast<LayerNorm>(blocks["layer_norm1"]);
|
||||
auto layer_norm2 = std::dynamic_pointer_cast<LayerNorm>(blocks["layer_norm2"]);
|
||||
auto mlp = std::dynamic_pointer_cast<CLIPMLP>(blocks["mlp"]);
|
||||
|
||||
x = ggml_add(ctx, x, self_attn->forward(ctx, layer_norm1->forward(ctx, x), mask));
|
||||
x = ggml_add(ctx, x, self_attn->forward(ctx, backend, layer_norm1->forward(ctx, x), mask));
|
||||
x = ggml_add(ctx, x, mlp->forward(ctx, layer_norm2->forward(ctx, x)));
|
||||
return x;
|
||||
}
|
||||
|
|
@ -517,7 +517,11 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, int clip_skip = -1, bool mask = true) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
int clip_skip = -1,
|
||||
bool mask = true) {
|
||||
// x: [N, n_token, d_model]
|
||||
int layer_idx = n_layer - 1;
|
||||
// LOG_DEBUG("clip_skip %d", clip_skip);
|
||||
|
|
@ -532,7 +536,7 @@ public:
|
|||
}
|
||||
std::string name = "layers." + std::to_string(i);
|
||||
auto layer = std::dynamic_pointer_cast<CLIPLayer>(blocks[name]);
|
||||
x = layer->forward(ctx, x, mask); // [N, n_token, d_model]
|
||||
x = layer->forward(ctx, backend, x, mask); // [N, n_token, d_model]
|
||||
// LOG_DEBUG("layer %d", i);
|
||||
}
|
||||
return x;
|
||||
|
|
@ -544,17 +548,18 @@ protected:
|
|||
int64_t embed_dim;
|
||||
int64_t vocab_size;
|
||||
int64_t num_positions;
|
||||
bool force_clip_f32;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||
enum ggml_type token_wtype = GGML_TYPE_F32;
|
||||
#if 1
|
||||
// kcpp reduce memory usage (reverts https://github.com/leejet/stable-diffusion.cpp/pull/601)
|
||||
auto tensor_type = tensor_types.find(prefix + "token_embedding.weight");
|
||||
if (tensor_type != tensor_types.end())
|
||||
token_wtype = tensor_type->second;
|
||||
#endif
|
||||
enum ggml_type position_wtype = GGML_TYPE_F32;
|
||||
|
||||
enum ggml_type token_wtype = GGML_TYPE_F32;
|
||||
if (!force_clip_f32) {
|
||||
auto tensor_type = tensor_types.find(prefix + "token_embedding.weight");
|
||||
std::set<ggml_type> allow_types = {GGML_TYPE_F16, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0};
|
||||
if (tensor_type != tensor_types.end() && allow_types.find(tensor_type->second) != allow_types.end()) {
|
||||
token_wtype = tensor_type->second;
|
||||
}
|
||||
}
|
||||
enum ggml_type position_wtype = GGML_TYPE_F32;
|
||||
params["token_embedding.weight"] = ggml_new_tensor_2d(ctx, token_wtype, embed_dim, vocab_size);
|
||||
params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, position_wtype, embed_dim, num_positions);
|
||||
}
|
||||
|
|
@ -562,10 +567,12 @@ protected:
|
|||
public:
|
||||
CLIPEmbeddings(int64_t embed_dim,
|
||||
int64_t vocab_size = 49408,
|
||||
int64_t num_positions = 77)
|
||||
int64_t num_positions = 77,
|
||||
bool force_clip_f32 = false)
|
||||
: embed_dim(embed_dim),
|
||||
vocab_size(vocab_size),
|
||||
num_positions(num_positions) {
|
||||
num_positions(num_positions),
|
||||
force_clip_f32(force_clip_f32) {
|
||||
}
|
||||
|
||||
struct ggml_tensor* get_token_embed_weight() {
|
||||
|
|
@ -680,12 +687,11 @@ public:
|
|||
int32_t n_head = 12;
|
||||
int32_t n_layer = 12; // num_hidden_layers
|
||||
int32_t projection_dim = 1280; // only for OPEN_CLIP_VIT_BIGG_14
|
||||
int32_t clip_skip = -1;
|
||||
bool with_final_ln = true;
|
||||
|
||||
CLIPTextModel(CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
||||
bool with_final_ln = true,
|
||||
int clip_skip_value = -1)
|
||||
bool force_clip_f32 = false)
|
||||
: version(version), with_final_ln(with_final_ln) {
|
||||
if (version == OPEN_CLIP_VIT_H_14) {
|
||||
hidden_size = 1024;
|
||||
|
|
@ -698,37 +704,31 @@ public:
|
|||
n_head = 20;
|
||||
n_layer = 32;
|
||||
}
|
||||
set_clip_skip(clip_skip_value);
|
||||
|
||||
blocks["embeddings"] = std::shared_ptr<GGMLBlock>(new CLIPEmbeddings(hidden_size, vocab_size, n_token));
|
||||
blocks["embeddings"] = std::shared_ptr<GGMLBlock>(new CLIPEmbeddings(hidden_size, vocab_size, n_token, force_clip_f32));
|
||||
blocks["encoder"] = std::shared_ptr<GGMLBlock>(new CLIPEncoder(n_layer, hidden_size, n_head, intermediate_size));
|
||||
blocks["final_layer_norm"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size));
|
||||
}
|
||||
|
||||
void set_clip_skip(int skip) {
|
||||
if (skip <= 0) {
|
||||
skip = -1;
|
||||
}
|
||||
clip_skip = skip;
|
||||
}
|
||||
|
||||
struct ggml_tensor* get_token_embed_weight() {
|
||||
auto embeddings = std::dynamic_pointer_cast<CLIPEmbeddings>(blocks["embeddings"]);
|
||||
return embeddings->get_token_embed_weight();
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* input_ids,
|
||||
struct ggml_tensor* tkn_embeddings,
|
||||
size_t max_token_idx = 0,
|
||||
bool return_pooled = false) {
|
||||
bool return_pooled = false,
|
||||
int clip_skip = -1) {
|
||||
// input_ids: [N, n_token]
|
||||
auto embeddings = std::dynamic_pointer_cast<CLIPEmbeddings>(blocks["embeddings"]);
|
||||
auto encoder = std::dynamic_pointer_cast<CLIPEncoder>(blocks["encoder"]);
|
||||
auto final_layer_norm = std::dynamic_pointer_cast<LayerNorm>(blocks["final_layer_norm"]);
|
||||
|
||||
auto x = embeddings->forward(ctx, input_ids, tkn_embeddings); // [N, n_token, hidden_size]
|
||||
x = encoder->forward(ctx, x, return_pooled ? -1 : clip_skip, true);
|
||||
x = encoder->forward(ctx, backend, x, return_pooled ? -1 : clip_skip, true);
|
||||
if (return_pooled || with_final_ln) {
|
||||
x = final_layer_norm->forward(ctx, x);
|
||||
}
|
||||
|
|
@ -739,7 +739,7 @@ public:
|
|||
if (text_projection != NULL) {
|
||||
pooled = ggml_nn_linear(ctx, pooled, text_projection, NULL);
|
||||
} else {
|
||||
LOG_DEBUG("Missing text_projection matrix, assuming identity...");
|
||||
LOG_DEBUG("identity projection");
|
||||
}
|
||||
return pooled; // [hidden_size, 1, 1]
|
||||
}
|
||||
|
|
@ -780,7 +780,11 @@ public:
|
|||
blocks["post_layernorm"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* pixel_values, bool return_pooled = true) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* pixel_values,
|
||||
bool return_pooled = true,
|
||||
int clip_skip = -1) {
|
||||
// pixel_values: [N, num_channels, image_size, image_size]
|
||||
auto embeddings = std::dynamic_pointer_cast<CLIPVisionEmbeddings>(blocks["embeddings"]);
|
||||
auto pre_layernorm = std::dynamic_pointer_cast<LayerNorm>(blocks["pre_layernorm"]);
|
||||
|
|
@ -789,7 +793,7 @@ public:
|
|||
|
||||
auto x = embeddings->forward(ctx, pixel_values); // [N, num_positions, embed_dim]
|
||||
x = pre_layernorm->forward(ctx, x);
|
||||
x = encoder->forward(ctx, x, -1, false);
|
||||
x = encoder->forward(ctx, backend, x, clip_skip, false);
|
||||
// print_ggml_tensor(x, true, "ClipVisionModel x: ");
|
||||
auto last_hidden_state = x;
|
||||
x = post_layernorm->forward(ctx, x); // [N, n_token, hidden_size]
|
||||
|
|
@ -857,16 +861,23 @@ public:
|
|||
blocks["visual_projection"] = std::shared_ptr<GGMLBlock>(new CLIPProjection(hidden_size, projection_dim, transpose_proj_w));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* pixel_values) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* pixel_values,
|
||||
bool return_pooled = true,
|
||||
int clip_skip = -1) {
|
||||
// pixel_values: [N, num_channels, image_size, image_size]
|
||||
// return: [N, projection_dim]
|
||||
// return: [N, projection_dim] if return_pooled else [N, n_token, hidden_size]
|
||||
auto vision_model = std::dynamic_pointer_cast<CLIPVisionModel>(blocks["vision_model"]);
|
||||
auto visual_projection = std::dynamic_pointer_cast<CLIPProjection>(blocks["visual_projection"]);
|
||||
|
||||
auto x = vision_model->forward(ctx, pixel_values); // [N, hidden_size]
|
||||
x = visual_projection->forward(ctx, x); // [N, projection_dim]
|
||||
auto x = vision_model->forward(ctx, backend, pixel_values, return_pooled, clip_skip); // [N, hidden_size] or [N, n_token, hidden_size]
|
||||
|
||||
return x; // [N, projection_dim]
|
||||
if (return_pooled) {
|
||||
x = visual_projection->forward(ctx, x); // [N, projection_dim]
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -874,12 +885,13 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||
CLIPTextModel model;
|
||||
|
||||
CLIPTextModelRunner(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types,
|
||||
const std::string prefix,
|
||||
CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
||||
bool with_final_ln = true,
|
||||
int clip_skip_value = -1)
|
||||
: GGMLRunner(backend), model(version, with_final_ln, clip_skip_value) {
|
||||
bool force_clip_f32 = false)
|
||||
: GGMLRunner(backend, offload_params_to_cpu), model(version, with_final_ln, force_clip_f32) {
|
||||
model.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
|
|
@ -887,19 +899,17 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||
return "clip";
|
||||
}
|
||||
|
||||
void set_clip_skip(int clip_skip) {
|
||||
model.set_clip_skip(clip_skip);
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
||||
model.get_param_tensors(tensors, prefix);
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* input_ids,
|
||||
struct ggml_tensor* embeddings,
|
||||
size_t max_token_idx = 0,
|
||||
bool return_pooled = false) {
|
||||
bool return_pooled = false,
|
||||
int clip_skip = -1) {
|
||||
size_t N = input_ids->ne[1];
|
||||
size_t n_token = input_ids->ne[0];
|
||||
if (input_ids->ne[0] > model.n_token) {
|
||||
|
|
@ -907,14 +917,15 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||
input_ids = ggml_reshape_2d(ctx, input_ids, model.n_token, input_ids->ne[0] / model.n_token);
|
||||
}
|
||||
|
||||
return model.forward(ctx, input_ids, embeddings, max_token_idx, return_pooled);
|
||||
return model.forward(ctx, backend, input_ids, embeddings, max_token_idx, return_pooled, clip_skip);
|
||||
}
|
||||
|
||||
struct ggml_cgraph* build_graph(struct ggml_tensor* input_ids,
|
||||
int num_custom_embeddings = 0,
|
||||
void* custom_embeddings_data = NULL,
|
||||
size_t max_token_idx = 0,
|
||||
bool return_pooled = false) {
|
||||
bool return_pooled = false,
|
||||
int clip_skip = -1) {
|
||||
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);
|
||||
|
||||
input_ids = to_backend(input_ids);
|
||||
|
|
@ -933,7 +944,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||
embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings, 1);
|
||||
}
|
||||
|
||||
struct ggml_tensor* hidden_states = forward(compute_ctx, input_ids, embeddings, max_token_idx, return_pooled);
|
||||
struct ggml_tensor* hidden_states = forward(compute_ctx, runtime_backend, input_ids, embeddings, max_token_idx, return_pooled, clip_skip);
|
||||
|
||||
ggml_build_forward_expand(gf, hidden_states);
|
||||
|
||||
|
|
@ -946,10 +957,11 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||
void* custom_embeddings_data,
|
||||
size_t max_token_idx,
|
||||
bool return_pooled,
|
||||
int clip_skip,
|
||||
ggml_tensor** output,
|
||||
ggml_context* output_ctx = NULL) {
|
||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_graph(input_ids, num_custom_embeddings, custom_embeddings_data, max_token_idx, return_pooled);
|
||||
return build_graph(input_ids, num_custom_embeddings, custom_embeddings_data, max_token_idx, return_pooled, clip_skip);
|
||||
};
|
||||
GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -270,7 +270,10 @@ public:
|
|||
// to_out_1 is nn.Dropout(), skip for inference
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* context) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* context) {
|
||||
// x: [N, n_token, query_dim]
|
||||
// context: [N, n_context, context_dim]
|
||||
// return: [N, n_token, query_dim]
|
||||
|
|
@ -288,7 +291,7 @@ public:
|
|||
auto k = to_k->forward(ctx, context); // [N, n_context, inner_dim]
|
||||
auto v = to_v->forward(ctx, context); // [N, n_context, inner_dim]
|
||||
|
||||
x = ggml_nn_attention_ext(ctx, q, k, v, n_head, NULL, false, false, flash_attn); // [N, n_token, inner_dim]
|
||||
x = ggml_nn_attention_ext(ctx, backend, q, k, v, n_head, NULL, false, false, flash_attn); // [N, n_token, inner_dim]
|
||||
|
||||
x = to_out_0->forward(ctx, x); // [N, n_token, query_dim]
|
||||
return x;
|
||||
|
|
@ -327,7 +330,10 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* context) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* context) {
|
||||
// x: [N, n_token, query_dim]
|
||||
// context: [N, n_context, context_dim]
|
||||
// return: [N, n_token, query_dim]
|
||||
|
|
@ -352,11 +358,11 @@ public:
|
|||
|
||||
auto r = x;
|
||||
x = norm1->forward(ctx, x);
|
||||
x = attn1->forward(ctx, x, x); // self-attention
|
||||
x = attn1->forward(ctx, backend, x, x); // self-attention
|
||||
x = ggml_add(ctx, x, r);
|
||||
r = x;
|
||||
x = norm2->forward(ctx, x);
|
||||
x = attn2->forward(ctx, x, context); // cross-attention
|
||||
x = attn2->forward(ctx, backend, x, context); // cross-attention
|
||||
x = ggml_add(ctx, x, r);
|
||||
r = x;
|
||||
x = norm3->forward(ctx, x);
|
||||
|
|
@ -401,7 +407,10 @@ public:
|
|||
blocks["proj_out"] = std::shared_ptr<GGMLBlock>(new Conv2d(inner_dim, in_channels, {1, 1}));
|
||||
}
|
||||
|
||||
virtual struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* context) {
|
||||
virtual struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* context) {
|
||||
// x: [N, in_channels, h, w]
|
||||
// context: [N, max_position(aka n_token), hidden_size(aka context_dim)]
|
||||
auto norm = std::dynamic_pointer_cast<GroupNorm32>(blocks["norm"]);
|
||||
|
|
@ -424,7 +433,7 @@ public:
|
|||
std::string name = "transformer_blocks." + std::to_string(i);
|
||||
auto transformer_block = std::dynamic_pointer_cast<BasicTransformerBlock>(blocks[name]);
|
||||
|
||||
x = transformer_block->forward(ctx, x, context);
|
||||
x = transformer_block->forward(ctx, backend, x, context);
|
||||
}
|
||||
|
||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 1, 0, 2, 3)); // [N, inner_dim, h * w]
|
||||
|
|
|
|||
|
|
@ -21,12 +21,12 @@ struct Conditioner {
|
|||
int clip_skip,
|
||||
int width,
|
||||
int height,
|
||||
int adm_in_channels = -1,
|
||||
bool force_zero_embeddings = false) = 0;
|
||||
virtual void alloc_params_buffer() = 0;
|
||||
virtual void free_params_buffer() = 0;
|
||||
virtual void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) = 0;
|
||||
virtual size_t get_params_buffer_size() = 0;
|
||||
int adm_in_channels = -1,
|
||||
bool zero_out_masked = false) = 0;
|
||||
virtual void alloc_params_buffer() = 0;
|
||||
virtual void free_params_buffer() = 0;
|
||||
virtual void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) = 0;
|
||||
virtual size_t get_params_buffer_size() = 0;
|
||||
virtual std::tuple<SDCondition, std::vector<bool>> get_learned_condition_with_trigger(ggml_context* work_ctx,
|
||||
int n_threads,
|
||||
const std::string& text,
|
||||
|
|
@ -34,10 +34,10 @@ struct Conditioner {
|
|||
int width,
|
||||
int height,
|
||||
int num_input_imgs,
|
||||
int adm_in_channels = -1,
|
||||
bool force_zero_embeddings = false) = 0;
|
||||
int adm_in_channels = -1,
|
||||
bool zero_out_masked = false) = 0;
|
||||
virtual std::string remove_trigger_from_prompt(ggml_context* work_ctx,
|
||||
const std::string& prompt) = 0;
|
||||
const std::string& prompt) = 0;
|
||||
};
|
||||
|
||||
// ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
||||
|
|
@ -57,33 +57,20 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
std::vector<std::string> readed_embeddings;
|
||||
|
||||
FrozenCLIPEmbedderWithCustomWords(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types,
|
||||
const std::string& embd_dir,
|
||||
SDVersion version = VERSION_SD1,
|
||||
PMVersion pv = PM_VERSION_1,
|
||||
int clip_skip = -1)
|
||||
PMVersion pv = PM_VERSION_1)
|
||||
: version(version), pm_version(pv), tokenizer(sd_version_is_sd2(version) ? 0 : 49407), embd_dir(embd_dir) {
|
||||
bool force_clip_f32 = embd_dir.size() > 0;
|
||||
if (sd_version_is_sd1(version)) {
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14);
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, offload_params_to_cpu, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, true, force_clip_f32);
|
||||
} else if (sd_version_is_sd2(version)) {
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14);
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, offload_params_to_cpu, tensor_types, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14, true, force_clip_f32);
|
||||
} else if (sd_version_is_sdxl(version)) {
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, false);
|
||||
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false);
|
||||
}
|
||||
set_clip_skip(clip_skip);
|
||||
}
|
||||
|
||||
void set_clip_skip(int clip_skip) {
|
||||
if (clip_skip <= 0) {
|
||||
clip_skip = 1;
|
||||
if (sd_version_is_sd2(version) || sd_version_is_sdxl(version)) {
|
||||
clip_skip = 2;
|
||||
}
|
||||
}
|
||||
text_model->set_clip_skip(clip_skip);
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
text_model2->set_clip_skip(clip_skip);
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, offload_params_to_cpu, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, false, force_clip_f32);
|
||||
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, offload_params_to_cpu, tensor_types, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false, force_clip_f32);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -128,7 +115,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
return true;
|
||||
}
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = 10 * 1024 * 1024; // max for custom embeddings 10 MB
|
||||
params.mem_size = 100 * 1024 * 1024; // max for custom embeddings 100 MB
|
||||
params.mem_buffer = NULL;
|
||||
params.no_alloc = false;
|
||||
struct ggml_context* embd_ctx = ggml_init(params);
|
||||
|
|
@ -154,7 +141,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
}
|
||||
return true;
|
||||
};
|
||||
model_loader.load_tensors(on_load, NULL);
|
||||
model_loader.load_tensors(on_load, 1);
|
||||
readed_embeddings.push_back(embd_name);
|
||||
if (embd) {
|
||||
int64_t hidden_size = text_model->model.hidden_size;
|
||||
|
|
@ -409,9 +396,8 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
int clip_skip,
|
||||
int width,
|
||||
int height,
|
||||
int adm_in_channels = -1,
|
||||
bool force_zero_embeddings = false) {
|
||||
set_clip_skip(clip_skip);
|
||||
int adm_in_channels = -1,
|
||||
bool zero_out_masked = false) {
|
||||
int64_t t0 = ggml_time_ms();
|
||||
struct ggml_tensor* hidden_states = NULL; // [N, n_token, hidden_size]
|
||||
struct ggml_tensor* chunk_hidden_states = NULL; // [n_token, hidden_size] or [n_token, hidden_size + hidden_size2]
|
||||
|
|
@ -420,6 +406,10 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
struct ggml_tensor* pooled = NULL;
|
||||
std::vector<float> hidden_states_vec;
|
||||
|
||||
if (clip_skip <= 0) {
|
||||
clip_skip = (sd_version_is_sd2(version) || sd_version_is_sdxl(version)) ? 2 : 1;
|
||||
}
|
||||
|
||||
size_t chunk_len = 77;
|
||||
size_t chunk_count = tokens.size() / chunk_len;
|
||||
for (int chunk_idx = 0; chunk_idx < chunk_count; chunk_idx++) {
|
||||
|
|
@ -454,6 +444,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
token_embed_custom.data(),
|
||||
max_token_idx,
|
||||
false,
|
||||
clip_skip,
|
||||
&chunk_hidden_states1,
|
||||
work_ctx);
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
|
|
@ -463,6 +454,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
token_embed_custom.data(),
|
||||
max_token_idx,
|
||||
false,
|
||||
clip_skip,
|
||||
&chunk_hidden_states2, work_ctx);
|
||||
// concat
|
||||
chunk_hidden_states = ggml_tensor_concat(work_ctx, chunk_hidden_states1, chunk_hidden_states2, 0);
|
||||
|
|
@ -474,6 +466,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
token_embed_custom.data(),
|
||||
max_token_idx,
|
||||
true,
|
||||
clip_skip,
|
||||
&pooled,
|
||||
work_ctx);
|
||||
}
|
||||
|
|
@ -499,7 +492,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
float new_mean = ggml_tensor_mean(result);
|
||||
ggml_tensor_scale(result, (original_mean / new_mean));
|
||||
}
|
||||
if (force_zero_embeddings) {
|
||||
if (zero_out_masked) {
|
||||
float* vec = (float*)result->data;
|
||||
for (int i = 0; i < ggml_nelements(result); i++) {
|
||||
vec[i] = 0;
|
||||
|
|
@ -562,8 +555,8 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
int width,
|
||||
int height,
|
||||
int num_input_imgs,
|
||||
int adm_in_channels = -1,
|
||||
bool force_zero_embeddings = false) {
|
||||
int adm_in_channels = -1,
|
||||
bool zero_out_masked = false) {
|
||||
auto image_tokens = convert_token_to_id(trigger_word);
|
||||
// if(image_tokens.size() == 1){
|
||||
// printf(" image token id is: %d \n", image_tokens[0]);
|
||||
|
|
@ -584,7 +577,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
// for(int i = 0; i < clsm.size(); ++i)
|
||||
// printf("%d ", clsm[i]?1:0);
|
||||
// printf("\n");
|
||||
auto cond = get_learned_condition_common(work_ctx, n_threads, tokens, weights, clip_skip, width, height, adm_in_channels, force_zero_embeddings);
|
||||
auto cond = get_learned_condition_common(work_ctx, n_threads, tokens, weights, clip_skip, width, height, adm_in_channels, zero_out_masked);
|
||||
return std::make_tuple(cond, clsm);
|
||||
}
|
||||
|
||||
|
|
@ -606,20 +599,22 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
int clip_skip,
|
||||
int width,
|
||||
int height,
|
||||
int adm_in_channels = -1,
|
||||
bool force_zero_embeddings = false) {
|
||||
int adm_in_channels = -1,
|
||||
bool zero_out_masked = false) {
|
||||
auto tokens_and_weights = tokenize(text, true);
|
||||
std::vector<int>& tokens = tokens_and_weights.first;
|
||||
std::vector<float>& weights = tokens_and_weights.second;
|
||||
return get_learned_condition_common(work_ctx, n_threads, tokens, weights, clip_skip, width, height, adm_in_channels, force_zero_embeddings);
|
||||
return get_learned_condition_common(work_ctx, n_threads, tokens, weights, clip_skip, width, height, adm_in_channels, zero_out_masked);
|
||||
}
|
||||
};
|
||||
|
||||
struct FrozenCLIPVisionEmbedder : public GGMLRunner {
|
||||
CLIPVisionModelProjection vision_model;
|
||||
|
||||
FrozenCLIPVisionEmbedder(ggml_backend_t backend, const String2GGMLType& tensor_types = {})
|
||||
: vision_model(OPEN_CLIP_VIT_H_14, true), GGMLRunner(backend) {
|
||||
FrozenCLIPVisionEmbedder(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types = {})
|
||||
: vision_model(OPEN_CLIP_VIT_H_14), GGMLRunner(backend, offload_params_to_cpu) {
|
||||
vision_model.init(params_ctx, tensor_types, "cond_stage_model.transformer");
|
||||
}
|
||||
|
||||
|
|
@ -631,12 +626,12 @@ struct FrozenCLIPVisionEmbedder : public GGMLRunner {
|
|||
vision_model.get_param_tensors(tensors, "cond_stage_model.transformer");
|
||||
}
|
||||
|
||||
struct ggml_cgraph* build_graph(struct ggml_tensor* pixel_values) {
|
||||
struct ggml_cgraph* build_graph(struct ggml_tensor* pixel_values, bool return_pooled, int clip_skip) {
|
||||
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);
|
||||
|
||||
pixel_values = to_backend(pixel_values);
|
||||
|
||||
struct ggml_tensor* hidden_states = vision_model.forward(compute_ctx, pixel_values);
|
||||
struct ggml_tensor* hidden_states = vision_model.forward(compute_ctx, runtime_backend, pixel_values, return_pooled, clip_skip);
|
||||
|
||||
ggml_build_forward_expand(gf, hidden_states);
|
||||
|
||||
|
|
@ -645,10 +640,12 @@ struct FrozenCLIPVisionEmbedder : public GGMLRunner {
|
|||
|
||||
void compute(const int n_threads,
|
||||
ggml_tensor* pixel_values,
|
||||
bool return_pooled,
|
||||
int clip_skip,
|
||||
ggml_tensor** output,
|
||||
ggml_context* output_ctx) {
|
||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_graph(pixel_values);
|
||||
return build_graph(pixel_values, return_pooled, clip_skip);
|
||||
};
|
||||
GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
|
||||
}
|
||||
|
|
@ -663,21 +660,12 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
std::shared_ptr<T5Runner> t5;
|
||||
|
||||
SD3CLIPEmbedder(ggml_backend_t backend,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
int clip_skip = -1)
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types = {})
|
||||
: clip_g_tokenizer(0) {
|
||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, false);
|
||||
clip_g = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false);
|
||||
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
||||
set_clip_skip(clip_skip);
|
||||
}
|
||||
|
||||
void set_clip_skip(int clip_skip) {
|
||||
if (clip_skip <= 0) {
|
||||
clip_skip = 2;
|
||||
}
|
||||
clip_l->set_clip_skip(clip_skip);
|
||||
clip_g->set_clip_skip(clip_skip);
|
||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, offload_params_to_cpu, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, false);
|
||||
clip_g = std::make_shared<CLIPTextModelRunner>(backend, offload_params_to_cpu, tensor_types, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false);
|
||||
t5 = std::make_shared<T5Runner>(backend, offload_params_to_cpu, tensor_types, "text_encoders.t5xxl.transformer");
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
|
|
@ -773,8 +761,7 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
int n_threads,
|
||||
std::vector<std::pair<std::vector<int>, std::vector<float>>> token_and_weights,
|
||||
int clip_skip,
|
||||
bool force_zero_embeddings = false) {
|
||||
set_clip_skip(clip_skip);
|
||||
bool zero_out_masked = false) {
|
||||
auto& clip_l_tokens = token_and_weights[0].first;
|
||||
auto& clip_l_weights = token_and_weights[0].second;
|
||||
auto& clip_g_tokens = token_and_weights[1].first;
|
||||
|
|
@ -782,6 +769,10 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
auto& t5_tokens = token_and_weights[2].first;
|
||||
auto& t5_weights = token_and_weights[2].second;
|
||||
|
||||
if (clip_skip <= 0) {
|
||||
clip_skip = 2;
|
||||
}
|
||||
|
||||
int64_t t0 = ggml_time_ms();
|
||||
struct ggml_tensor* hidden_states = NULL; // [N, n_token*2, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states = NULL; // [n_token*2, 4096]
|
||||
|
|
@ -812,6 +803,7 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
NULL,
|
||||
max_token_idx,
|
||||
false,
|
||||
clip_skip,
|
||||
&chunk_hidden_states_l,
|
||||
work_ctx);
|
||||
{
|
||||
|
|
@ -839,6 +831,7 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
NULL,
|
||||
max_token_idx,
|
||||
true,
|
||||
clip_skip,
|
||||
&pooled_l,
|
||||
work_ctx);
|
||||
}
|
||||
|
|
@ -860,6 +853,7 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
NULL,
|
||||
max_token_idx,
|
||||
false,
|
||||
clip_skip,
|
||||
&chunk_hidden_states_g,
|
||||
work_ctx);
|
||||
|
||||
|
|
@ -888,6 +882,7 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
NULL,
|
||||
max_token_idx,
|
||||
true,
|
||||
clip_skip,
|
||||
&pooled_g,
|
||||
work_ctx);
|
||||
}
|
||||
|
|
@ -952,7 +947,7 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
|
||||
int64_t t1 = ggml_time_ms();
|
||||
LOG_DEBUG("computing condition graph completed, taking %" PRId64 " ms", t1 - t0);
|
||||
if (force_zero_embeddings) {
|
||||
if (zero_out_masked) {
|
||||
float* vec = (float*)chunk_hidden_states->data;
|
||||
for (int i = 0; i < ggml_nelements(chunk_hidden_states); i++) {
|
||||
vec[i] = 0;
|
||||
|
|
@ -978,10 +973,10 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
int clip_skip,
|
||||
int width,
|
||||
int height,
|
||||
int adm_in_channels = -1,
|
||||
bool force_zero_embeddings = false) {
|
||||
int adm_in_channels = -1,
|
||||
bool zero_out_masked = false) {
|
||||
auto tokens_and_weights = tokenize(text, 77, true);
|
||||
return get_learned_condition_common(work_ctx, n_threads, tokens_and_weights, clip_skip, force_zero_embeddings);
|
||||
return get_learned_condition_common(work_ctx, n_threads, tokens_and_weights, clip_skip, zero_out_masked);
|
||||
}
|
||||
|
||||
std::tuple<SDCondition, std::vector<bool>> get_learned_condition_with_trigger(ggml_context* work_ctx,
|
||||
|
|
@ -991,8 +986,8 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
int width,
|
||||
int height,
|
||||
int num_input_imgs,
|
||||
int adm_in_channels = -1,
|
||||
bool force_zero_embeddings = false) {
|
||||
int adm_in_channels = -1,
|
||||
bool zero_out_masked = false) {
|
||||
GGML_ASSERT(0 && "Not implemented yet!");
|
||||
}
|
||||
|
||||
|
|
@ -1010,18 +1005,10 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
size_t chunk_len = 256;
|
||||
|
||||
FluxCLIPEmbedder(ggml_backend_t backend,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
int clip_skip = -1) {
|
||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, true);
|
||||
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
||||
set_clip_skip(clip_skip);
|
||||
}
|
||||
|
||||
void set_clip_skip(int clip_skip) {
|
||||
if (clip_skip <= 0) {
|
||||
clip_skip = 2;
|
||||
}
|
||||
clip_l->set_clip_skip(clip_skip);
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types = {}) {
|
||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, offload_params_to_cpu, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, true);
|
||||
t5 = std::make_shared<T5Runner>(backend, offload_params_to_cpu, tensor_types, "text_encoders.t5xxl.transformer");
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
|
|
@ -1101,13 +1088,16 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
int n_threads,
|
||||
std::vector<std::pair<std::vector<int>, std::vector<float>>> token_and_weights,
|
||||
int clip_skip,
|
||||
bool force_zero_embeddings = false) {
|
||||
set_clip_skip(clip_skip);
|
||||
bool zero_out_masked = false) {
|
||||
auto& clip_l_tokens = token_and_weights[0].first;
|
||||
auto& clip_l_weights = token_and_weights[0].second;
|
||||
auto& t5_tokens = token_and_weights[1].first;
|
||||
auto& t5_weights = token_and_weights[1].second;
|
||||
|
||||
if (clip_skip <= 0) {
|
||||
clip_skip = 2;
|
||||
}
|
||||
|
||||
int64_t t0 = ggml_time_ms();
|
||||
struct ggml_tensor* hidden_states = NULL; // [N, n_token, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states = NULL; // [n_token, 4096]
|
||||
|
|
@ -1136,6 +1126,7 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
NULL,
|
||||
max_token_idx,
|
||||
true,
|
||||
clip_skip,
|
||||
&pooled,
|
||||
work_ctx);
|
||||
}
|
||||
|
|
@ -1173,7 +1164,7 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
|
||||
int64_t t1 = ggml_time_ms();
|
||||
LOG_DEBUG("computing condition graph completed, taking %" PRId64 " ms", t1 - t0);
|
||||
if (force_zero_embeddings) {
|
||||
if (zero_out_masked) {
|
||||
float* vec = (float*)chunk_hidden_states->data;
|
||||
for (int i = 0; i < ggml_nelements(chunk_hidden_states); i++) {
|
||||
vec[i] = 0;
|
||||
|
|
@ -1199,10 +1190,10 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
int clip_skip,
|
||||
int width,
|
||||
int height,
|
||||
int adm_in_channels = -1,
|
||||
bool force_zero_embeddings = false) {
|
||||
int adm_in_channels = -1,
|
||||
bool zero_out_masked = false) {
|
||||
auto tokens_and_weights = tokenize(text, chunk_len, true);
|
||||
return get_learned_condition_common(work_ctx, n_threads, tokens_and_weights, clip_skip, force_zero_embeddings);
|
||||
return get_learned_condition_common(work_ctx, n_threads, tokens_and_weights, clip_skip, zero_out_masked);
|
||||
}
|
||||
|
||||
std::tuple<SDCondition, std::vector<bool>> get_learned_condition_with_trigger(ggml_context* work_ctx,
|
||||
|
|
@ -1212,8 +1203,8 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
int width,
|
||||
int height,
|
||||
int num_input_imgs,
|
||||
int adm_in_channels = -1,
|
||||
bool force_zero_embeddings = false) {
|
||||
int adm_in_channels = -1,
|
||||
bool zero_out_masked = false) {
|
||||
GGML_ASSERT(0 && "Not implemented yet!");
|
||||
}
|
||||
|
||||
|
|
@ -1223,23 +1214,22 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
}
|
||||
};
|
||||
|
||||
struct PixArtCLIPEmbedder : public Conditioner {
|
||||
struct T5CLIPEmbedder : public Conditioner {
|
||||
T5UniGramTokenizer t5_tokenizer;
|
||||
std::shared_ptr<T5Runner> t5;
|
||||
size_t chunk_len = 512;
|
||||
bool use_mask = false;
|
||||
int mask_pad = 1;
|
||||
bool is_umt5 = false;
|
||||
|
||||
PixArtCLIPEmbedder(ggml_backend_t backend,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
int clip_skip = -1,
|
||||
bool use_mask = false,
|
||||
int mask_pad = 1)
|
||||
: use_mask(use_mask), mask_pad(mask_pad) {
|
||||
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
||||
}
|
||||
|
||||
void set_clip_skip(int clip_skip) {
|
||||
T5CLIPEmbedder(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
bool use_mask = false,
|
||||
int mask_pad = 1,
|
||||
bool is_umt5 = false)
|
||||
: use_mask(use_mask), mask_pad(mask_pad), t5_tokenizer(is_umt5) {
|
||||
t5 = std::make_shared<T5Runner>(backend, offload_params_to_cpu, tensor_types, "text_encoders.t5xxl.transformer", is_umt5);
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
|
|
@ -1317,16 +1307,16 @@ struct PixArtCLIPEmbedder : public Conditioner {
|
|||
int n_threads,
|
||||
std::tuple<std::vector<int>, std::vector<float>, std::vector<float>> token_and_weights,
|
||||
int clip_skip,
|
||||
bool force_zero_embeddings = false) {
|
||||
bool zero_out_masked = false) {
|
||||
auto& t5_tokens = std::get<0>(token_and_weights);
|
||||
auto& t5_weights = std::get<1>(token_and_weights);
|
||||
auto& t5_attn_mask_vec = std::get<2>(token_and_weights);
|
||||
|
||||
int64_t t0 = ggml_time_ms();
|
||||
struct ggml_tensor* hidden_states = NULL; // [N, n_token, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states = NULL; // [n_token, 4096]
|
||||
struct ggml_tensor* pooled = NULL; // [768,]
|
||||
struct ggml_tensor* t5_attn_mask = vector_to_ggml_tensor(work_ctx, t5_attn_mask_vec); // [768,]
|
||||
struct ggml_tensor* hidden_states = NULL; // [N, n_token, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states = NULL; // [n_token, 4096]
|
||||
struct ggml_tensor* pooled = NULL;
|
||||
struct ggml_tensor* t5_attn_mask = vector_to_ggml_tensor(work_ctx, t5_attn_mask_vec); // [n_token]
|
||||
|
||||
std::vector<float> hidden_states_vec;
|
||||
|
||||
|
|
@ -1367,10 +1357,16 @@ struct PixArtCLIPEmbedder : public Conditioner {
|
|||
|
||||
int64_t t1 = ggml_time_ms();
|
||||
LOG_DEBUG("computing condition graph completed, taking %" PRId64 " ms", t1 - t0);
|
||||
if (force_zero_embeddings) {
|
||||
float* vec = (float*)chunk_hidden_states->data;
|
||||
for (int i = 0; i < ggml_nelements(chunk_hidden_states); i++) {
|
||||
vec[i] = 0;
|
||||
if (zero_out_masked) {
|
||||
auto tensor = chunk_hidden_states;
|
||||
for (int i2 = 0; i2 < tensor->ne[2]; i2++) {
|
||||
for (int i1 = 0; i1 < tensor->ne[1]; i1++) {
|
||||
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
|
||||
if (chunk_mask[i1] < 0.f) {
|
||||
ggml_tensor_set_f32(tensor, 0.f, i0, i1, i2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1379,16 +1375,12 @@ struct PixArtCLIPEmbedder : public Conditioner {
|
|||
((float*)chunk_hidden_states->data) + ggml_nelements(chunk_hidden_states));
|
||||
}
|
||||
|
||||
if (hidden_states_vec.size() > 0) {
|
||||
hidden_states = vector_to_ggml_tensor(work_ctx, hidden_states_vec);
|
||||
hidden_states = ggml_reshape_2d(work_ctx,
|
||||
hidden_states,
|
||||
chunk_hidden_states->ne[0],
|
||||
ggml_nelements(hidden_states) / chunk_hidden_states->ne[0]);
|
||||
} else {
|
||||
hidden_states = ggml_new_tensor_2d(work_ctx, GGML_TYPE_F32, 4096, 256);
|
||||
ggml_set_f32(hidden_states, 0.f);
|
||||
}
|
||||
GGML_ASSERT(hidden_states_vec.size() > 0);
|
||||
hidden_states = vector_to_ggml_tensor(work_ctx, hidden_states_vec);
|
||||
hidden_states = ggml_reshape_2d(work_ctx,
|
||||
hidden_states,
|
||||
chunk_hidden_states->ne[0],
|
||||
ggml_nelements(hidden_states) / chunk_hidden_states->ne[0]);
|
||||
|
||||
modify_mask_to_attend_padding(t5_attn_mask, ggml_nelements(t5_attn_mask), mask_pad);
|
||||
|
||||
|
|
@ -1401,10 +1393,10 @@ struct PixArtCLIPEmbedder : public Conditioner {
|
|||
int clip_skip,
|
||||
int width,
|
||||
int height,
|
||||
int adm_in_channels = -1,
|
||||
bool force_zero_embeddings = false) {
|
||||
int adm_in_channels = -1,
|
||||
bool zero_out_masked = false) {
|
||||
auto tokens_and_weights = tokenize(text, chunk_len, true);
|
||||
return get_learned_condition_common(work_ctx, n_threads, tokens_and_weights, clip_skip, force_zero_embeddings);
|
||||
return get_learned_condition_common(work_ctx, n_threads, tokens_and_weights, clip_skip, zero_out_masked);
|
||||
}
|
||||
|
||||
std::tuple<SDCondition, std::vector<bool>> get_learned_condition_with_trigger(ggml_context* work_ctx,
|
||||
|
|
@ -1414,8 +1406,8 @@ struct PixArtCLIPEmbedder : public Conditioner {
|
|||
int width,
|
||||
int height,
|
||||
int num_input_imgs,
|
||||
int adm_in_channels = -1,
|
||||
bool force_zero_embeddings = false) {
|
||||
int adm_in_channels = -1,
|
||||
bool zero_out_masked = false) {
|
||||
GGML_ASSERT(0 && "Not implemented yet!");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -174,10 +174,11 @@ public:
|
|||
|
||||
struct ggml_tensor* attention_layer_forward(std::string name,
|
||||
struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* context) {
|
||||
auto block = std::dynamic_pointer_cast<SpatialTransformer>(blocks[name]);
|
||||
return block->forward(ctx, x, context);
|
||||
return block->forward(ctx, backend, x, context);
|
||||
}
|
||||
|
||||
struct ggml_tensor* input_hint_block_forward(struct ggml_context* ctx,
|
||||
|
|
@ -199,6 +200,7 @@ public:
|
|||
}
|
||||
|
||||
std::vector<struct ggml_tensor*> forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* hint,
|
||||
struct ggml_tensor* guided_hint,
|
||||
|
|
@ -272,7 +274,7 @@ public:
|
|||
h = resblock_forward(name, ctx, h, emb); // [N, mult*model_channels, h, w]
|
||||
if (std::find(attention_resolutions.begin(), attention_resolutions.end(), ds) != attention_resolutions.end()) {
|
||||
std::string name = "input_blocks." + std::to_string(input_block_idx) + ".1";
|
||||
h = attention_layer_forward(name, ctx, h, context); // [N, mult*model_channels, h, w]
|
||||
h = attention_layer_forward(name, ctx, backend, h, context); // [N, mult*model_channels, h, w]
|
||||
}
|
||||
|
||||
auto zero_conv = std::dynamic_pointer_cast<Conv2d>(blocks["zero_convs." + std::to_string(input_block_idx) + ".0"]);
|
||||
|
|
@ -296,9 +298,9 @@ public:
|
|||
// [N, 4*model_channels, h/8, w/8]
|
||||
|
||||
// middle_block
|
||||
h = resblock_forward("middle_block.0", ctx, h, emb); // [N, 4*model_channels, h/8, w/8]
|
||||
h = attention_layer_forward("middle_block.1", ctx, h, context); // [N, 4*model_channels, h/8, w/8]
|
||||
h = resblock_forward("middle_block.2", ctx, h, emb); // [N, 4*model_channels, h/8, w/8]
|
||||
h = resblock_forward("middle_block.0", ctx, h, emb); // [N, 4*model_channels, h/8, w/8]
|
||||
h = attention_layer_forward("middle_block.1", ctx, backend, h, context); // [N, 4*model_channels, h/8, w/8]
|
||||
h = resblock_forward("middle_block.2", ctx, h, emb); // [N, 4*model_channels, h/8, w/8]
|
||||
|
||||
// out
|
||||
outs.push_back(middle_block_out->forward(ctx, h));
|
||||
|
|
@ -317,9 +319,10 @@ struct ControlNet : public GGMLRunner {
|
|||
bool guided_hint_cached = false;
|
||||
|
||||
ControlNet(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
SDVersion version = VERSION_SD1)
|
||||
: GGMLRunner(backend), control_net(version) {
|
||||
: GGMLRunner(backend, offload_params_to_cpu), control_net(version) {
|
||||
control_net.init(params_ctx, tensor_types, "");
|
||||
}
|
||||
|
||||
|
|
@ -357,7 +360,7 @@ struct ControlNet : public GGMLRunner {
|
|||
control_buffer_size += ggml_nbytes(controls[i]);
|
||||
}
|
||||
|
||||
control_buffer = ggml_backend_alloc_ctx_tensors(control_ctx, backend);
|
||||
control_buffer = ggml_backend_alloc_ctx_tensors(control_ctx, runtime_backend);
|
||||
|
||||
LOG_DEBUG("control buffer size %.2fMB", control_buffer_size * 1.f / 1024.f / 1024.f);
|
||||
}
|
||||
|
|
@ -402,6 +405,7 @@ struct ControlNet : public GGMLRunner {
|
|||
timesteps = to_backend(timesteps);
|
||||
|
||||
auto outs = control_net.forward(compute_ctx,
|
||||
runtime_backend,
|
||||
x,
|
||||
hint,
|
||||
guided_hint_cached ? guided_hint : NULL,
|
||||
|
|
@ -441,7 +445,7 @@ struct ControlNet : public GGMLRunner {
|
|||
guided_hint_cached = true;
|
||||
}
|
||||
|
||||
bool load_from_file(const std::string& file_path) {
|
||||
bool load_from_file(const std::string& file_path, int n_threads) {
|
||||
LOG_INFO("loading control net from '%s'", file_path.c_str());
|
||||
alloc_params_buffer();
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
|
|
@ -454,7 +458,7 @@ struct ControlNet : public GGMLRunner {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool success = model_loader.load_tensors(tensors, backend, ignore_tensors);
|
||||
bool success = model_loader.load_tensors(tensors, ignore_tensors, n_threads);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load control net tensors from model loader failed");
|
||||
|
|
|
|||
|
|
@ -232,6 +232,25 @@ struct GITSSchedule : SigmaSchedule {
|
|||
}
|
||||
};
|
||||
|
||||
struct SGMUniformSchedule : SigmaSchedule {
|
||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min_in, float sigma_max_in, t_to_sigma_t t_to_sigma_func) override {
|
||||
std::vector<float> result;
|
||||
if (n == 0) {
|
||||
result.push_back(0.0f);
|
||||
return result;
|
||||
}
|
||||
result.reserve(n + 1);
|
||||
int t_max = TIMESTEPS - 1;
|
||||
int t_min = 0;
|
||||
std::vector<float> timesteps = linear_space(static_cast<float>(t_max), static_cast<float>(t_min), n + 1);
|
||||
for (int i = 0; i < n; i++) {
|
||||
result.push_back(t_to_sigma_func(timesteps[i]));
|
||||
}
|
||||
result.push_back(0.0f);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
struct KarrasSchedule : SigmaSchedule {
|
||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
|
||||
// These *COULD* be function arguments here,
|
||||
|
|
@ -251,8 +270,66 @@ struct KarrasSchedule : SigmaSchedule {
|
|||
}
|
||||
};
|
||||
|
||||
struct SimpleSchedule : SigmaSchedule {
|
||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
|
||||
std::vector<float> result_sigmas;
|
||||
|
||||
if (n == 0) {
|
||||
return result_sigmas;
|
||||
}
|
||||
|
||||
result_sigmas.reserve(n + 1);
|
||||
|
||||
int model_sigmas_len = TIMESTEPS;
|
||||
|
||||
float step_factor = static_cast<float>(model_sigmas_len) / static_cast<float>(n);
|
||||
|
||||
for (uint32_t i = 0; i < n; ++i) {
|
||||
int offset_from_start_of_py_array = static_cast<int>(static_cast<float>(i) * step_factor);
|
||||
int timestep_index = model_sigmas_len - 1 - offset_from_start_of_py_array;
|
||||
|
||||
if (timestep_index < 0) {
|
||||
timestep_index = 0;
|
||||
}
|
||||
|
||||
result_sigmas.push_back(t_to_sigma(static_cast<float>(timestep_index)));
|
||||
}
|
||||
result_sigmas.push_back(0.0f);
|
||||
return result_sigmas;
|
||||
}
|
||||
};
|
||||
|
||||
// Close to Beta Schedule, but increadably simple in code.
|
||||
struct SmoothStepSchedule : SigmaSchedule {
|
||||
static constexpr float smoothstep(float x) {
|
||||
return x * x * (3.0f - 2.0f * x);
|
||||
}
|
||||
|
||||
std::vector<float> get_sigmas(uint32_t n, float /*sigma_min*/, float /*sigma_max*/, t_to_sigma_t t_to_sigma) override {
|
||||
std::vector<float> result;
|
||||
result.reserve(n + 1);
|
||||
|
||||
const int t_max = TIMESTEPS - 1;
|
||||
if (n == 0) {
|
||||
return result;
|
||||
} else if (n == 1) {
|
||||
result.push_back(t_to_sigma((float)t_max));
|
||||
result.push_back(0.f);
|
||||
return result;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < n; i++) {
|
||||
float u = 1.f - float(i) / float(n);
|
||||
result.push_back(t_to_sigma(std::round(smoothstep(u) * t_max)));
|
||||
}
|
||||
|
||||
result.push_back(0.f);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
struct Denoiser {
|
||||
std::shared_ptr<SigmaSchedule> schedule = std::make_shared<DiscreteSchedule>();
|
||||
std::shared_ptr<SigmaSchedule> scheduler = std::make_shared<DiscreteSchedule>();
|
||||
virtual float sigma_min() = 0;
|
||||
virtual float sigma_max() = 0;
|
||||
virtual float sigma_to_t(float sigma) = 0;
|
||||
|
|
@ -263,7 +340,7 @@ struct Denoiser {
|
|||
|
||||
virtual std::vector<float> get_sigmas(uint32_t n) {
|
||||
auto bound_t_to_sigma = std::bind(&Denoiser::t_to_sigma, this, std::placeholders::_1);
|
||||
return schedule->get_sigmas(n, sigma_min(), sigma_max(), bound_t_to_sigma);
|
||||
return scheduler->get_sigmas(n, sigma_min(), sigma_max(), bound_t_to_sigma);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -349,7 +426,7 @@ struct EDMVDenoiser : public CompVisVDenoiser {
|
|||
|
||||
EDMVDenoiser(float min_sigma = 0.002, float max_sigma = 120.0)
|
||||
: min_sigma(min_sigma), max_sigma(max_sigma) {
|
||||
schedule = std::make_shared<ExponentialSchedule>();
|
||||
scheduler = std::make_shared<ExponentialSchedule>();
|
||||
}
|
||||
|
||||
float t_to_sigma(float t) {
|
||||
|
|
@ -382,7 +459,8 @@ struct DiscreteFlowDenoiser : public Denoiser {
|
|||
|
||||
float sigma_data = 1.0f;
|
||||
|
||||
DiscreteFlowDenoiser() {
|
||||
DiscreteFlowDenoiser(float shift = 3.0f)
|
||||
: shift(shift) {
|
||||
set_parameters();
|
||||
}
|
||||
|
||||
|
|
@ -692,7 +770,6 @@ static void sample_k_diffusion(sample_method_t method,
|
|||
} break;
|
||||
case DPMPP2S_A: {
|
||||
struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x);
|
||||
struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x);
|
||||
struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x);
|
||||
|
||||
for (int i = 0; i < steps; i++) {
|
||||
|
|
@ -707,22 +784,15 @@ static void sample_k_diffusion(sample_method_t method,
|
|||
auto sigma_fn = [](float t) -> float { return exp(-t); };
|
||||
|
||||
if (sigma_down == 0) {
|
||||
// Euler step
|
||||
float* vec_d = (float*)d->data;
|
||||
// d = (x - denoised) / sigmas[i];
|
||||
// dt = sigma_down - sigmas[i];
|
||||
// x += d * dt;
|
||||
// => x = denoised
|
||||
float* vec_x = (float*)x->data;
|
||||
float* vec_denoised = (float*)denoised->data;
|
||||
|
||||
for (int j = 0; j < ggml_nelements(d); j++) {
|
||||
vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i];
|
||||
}
|
||||
|
||||
// TODO: If sigma_down == 0, isn't this wrong?
|
||||
// But
|
||||
// https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/sampling.py#L525
|
||||
// has this exactly the same way.
|
||||
float dt = sigma_down - sigmas[i];
|
||||
for (int j = 0; j < ggml_nelements(d); j++) {
|
||||
vec_x[j] = vec_x[j] + vec_d[j] * dt;
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
vec_x[j] = vec_denoised[j];
|
||||
}
|
||||
} else {
|
||||
// DPM-Solver++(2S)
|
||||
|
|
@ -731,7 +801,6 @@ static void sample_k_diffusion(sample_method_t method,
|
|||
float h = t_next - t;
|
||||
float s = t + 0.5f * h;
|
||||
|
||||
float* vec_d = (float*)d->data;
|
||||
float* vec_x = (float*)x->data;
|
||||
float* vec_x2 = (float*)x2->data;
|
||||
float* vec_denoised = (float*)denoised->data;
|
||||
|
|
|
|||
|
|
@ -4,22 +4,31 @@
|
|||
#include "flux.hpp"
|
||||
#include "mmdit.hpp"
|
||||
#include "unet.hpp"
|
||||
#include "wan.hpp"
|
||||
|
||||
struct DiffusionParams {
|
||||
struct ggml_tensor* x = NULL;
|
||||
struct ggml_tensor* timesteps = NULL;
|
||||
struct ggml_tensor* context = NULL;
|
||||
struct ggml_tensor* c_concat = NULL;
|
||||
struct ggml_tensor* y = NULL;
|
||||
struct ggml_tensor* guidance = NULL;
|
||||
std::vector<ggml_tensor*> ref_latents = {};
|
||||
bool increase_ref_index = false;
|
||||
int num_video_frames = -1;
|
||||
std::vector<struct ggml_tensor*> controls = {};
|
||||
float control_strength = 0.f;
|
||||
struct ggml_tensor* vace_context = NULL;
|
||||
float vace_strength = 1.f;
|
||||
std::vector<int> skip_layers = {};
|
||||
};
|
||||
|
||||
struct DiffusionModel {
|
||||
virtual std::string get_desc() = 0;
|
||||
virtual void compute(int n_threads,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* timesteps,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* c_concat,
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
int num_video_frames = -1,
|
||||
std::vector<struct ggml_tensor*> controls = {},
|
||||
float control_strength = 0.f,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL,
|
||||
std::vector<int> skip_layers = std::vector<int>()) = 0;
|
||||
DiffusionParams diffusion_params,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) = 0;
|
||||
virtual void alloc_params_buffer() = 0;
|
||||
virtual void free_params_buffer() = 0;
|
||||
virtual void free_compute_buffer() = 0;
|
||||
|
|
@ -32,10 +41,15 @@ struct UNetModel : public DiffusionModel {
|
|||
UNetModelRunner unet;
|
||||
|
||||
UNetModel(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
SDVersion version = VERSION_SD1,
|
||||
bool flash_attn = false)
|
||||
: unet(backend, tensor_types, "model.diffusion_model", version, flash_attn) {
|
||||
: unet(backend, offload_params_to_cpu, tensor_types, "model.diffusion_model", version, flash_attn) {
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
return unet.get_desc();
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
|
|
@ -63,21 +77,18 @@ struct UNetModel : public DiffusionModel {
|
|||
}
|
||||
|
||||
void compute(int n_threads,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* timesteps,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* c_concat,
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
int num_video_frames = -1,
|
||||
std::vector<struct ggml_tensor*> controls = {},
|
||||
float control_strength = 0.f,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL,
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
(void)skip_layers; // SLG doesn't work with UNet models
|
||||
return unet.compute(n_threads, x, timesteps, context, c_concat, y, num_video_frames, controls, control_strength, output, output_ctx);
|
||||
DiffusionParams diffusion_params,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
return unet.compute(n_threads,
|
||||
diffusion_params.x,
|
||||
diffusion_params.timesteps,
|
||||
diffusion_params.context,
|
||||
diffusion_params.c_concat,
|
||||
diffusion_params.y,
|
||||
diffusion_params.num_video_frames,
|
||||
diffusion_params.controls,
|
||||
diffusion_params.control_strength, output, output_ctx);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -85,8 +96,14 @@ struct MMDiTModel : public DiffusionModel {
|
|||
MMDiTRunner mmdit;
|
||||
|
||||
MMDiTModel(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
bool flash_attn = false,
|
||||
const String2GGMLType& tensor_types = {})
|
||||
: mmdit(backend, tensor_types, "model.diffusion_model") {
|
||||
: mmdit(backend, offload_params_to_cpu, flash_attn, tensor_types, "model.diffusion_model") {
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
return mmdit.get_desc();
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
|
|
@ -114,20 +131,17 @@ struct MMDiTModel : public DiffusionModel {
|
|||
}
|
||||
|
||||
void compute(int n_threads,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* timesteps,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* c_concat,
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
int num_video_frames = -1,
|
||||
std::vector<struct ggml_tensor*> controls = {},
|
||||
float control_strength = 0.f,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL,
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
return mmdit.compute(n_threads, x, timesteps, context, y, output, output_ctx, skip_layers);
|
||||
DiffusionParams diffusion_params,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
return mmdit.compute(n_threads,
|
||||
diffusion_params.x,
|
||||
diffusion_params.timesteps,
|
||||
diffusion_params.context,
|
||||
diffusion_params.y,
|
||||
output,
|
||||
output_ctx,
|
||||
diffusion_params.skip_layers);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -135,11 +149,16 @@ struct FluxModel : public DiffusionModel {
|
|||
Flux::FluxRunner flux;
|
||||
|
||||
FluxModel(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
SDVersion version = VERSION_FLUX,
|
||||
bool flash_attn = false,
|
||||
bool use_mask = false)
|
||||
: flux(backend, tensor_types, "model.diffusion_model", version, flash_attn, use_mask) {
|
||||
: flux(backend, offload_params_to_cpu, tensor_types, "model.diffusion_model", version, flash_attn, use_mask) {
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
return flux.get_desc();
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
|
|
@ -167,20 +186,80 @@ struct FluxModel : public DiffusionModel {
|
|||
}
|
||||
|
||||
void compute(int n_threads,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* timesteps,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* c_concat,
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
int num_video_frames = -1,
|
||||
std::vector<struct ggml_tensor*> controls = {},
|
||||
float control_strength = 0.f,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL,
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
return flux.compute(n_threads, x, timesteps, context, c_concat, y, guidance, ref_latents, output, output_ctx, skip_layers);
|
||||
DiffusionParams diffusion_params,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
return flux.compute(n_threads,
|
||||
diffusion_params.x,
|
||||
diffusion_params.timesteps,
|
||||
diffusion_params.context,
|
||||
diffusion_params.c_concat,
|
||||
diffusion_params.y,
|
||||
diffusion_params.guidance,
|
||||
diffusion_params.ref_latents,
|
||||
diffusion_params.increase_ref_index,
|
||||
output,
|
||||
output_ctx,
|
||||
diffusion_params.skip_layers);
|
||||
}
|
||||
};
|
||||
|
||||
struct WanModel : public DiffusionModel {
|
||||
std::string prefix;
|
||||
WAN::WanRunner wan;
|
||||
|
||||
WanModel(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
const std::string prefix = "model.diffusion_model",
|
||||
SDVersion version = VERSION_WAN2,
|
||||
bool flash_attn = false)
|
||||
: prefix(prefix), wan(backend, offload_params_to_cpu, tensor_types, prefix, version, flash_attn) {
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
return wan.get_desc();
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
wan.alloc_params_buffer();
|
||||
}
|
||||
|
||||
void free_params_buffer() {
|
||||
wan.free_params_buffer();
|
||||
}
|
||||
|
||||
void free_compute_buffer() {
|
||||
wan.free_compute_buffer();
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
wan.get_param_tensors(tensors, prefix);
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
return wan.get_params_buffer_size();
|
||||
}
|
||||
|
||||
int64_t get_adm_in_channels() {
|
||||
return 768;
|
||||
}
|
||||
|
||||
void compute(int n_threads,
|
||||
DiffusionParams diffusion_params,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
return wan.compute(n_threads,
|
||||
diffusion_params.x,
|
||||
diffusion_params.timesteps,
|
||||
diffusion_params.context,
|
||||
diffusion_params.y,
|
||||
diffusion_params.c_concat,
|
||||
NULL,
|
||||
diffusion_params.vace_context,
|
||||
diffusion_params.vace_strength,
|
||||
output,
|
||||
output_ctx);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -142,8 +142,10 @@ struct ESRGAN : public GGMLRunner {
|
|||
int scale = 4;
|
||||
int tile_size = 128; // avoid cuda OOM for 4gb VRAM
|
||||
|
||||
ESRGAN(ggml_backend_t backend, const String2GGMLType& tensor_types = {})
|
||||
: GGMLRunner(backend) {
|
||||
ESRGAN(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types = {})
|
||||
: GGMLRunner(backend, offload_params_to_cpu) {
|
||||
rrdb_net.init(params_ctx, tensor_types, "");
|
||||
}
|
||||
|
||||
|
|
@ -162,7 +164,7 @@ struct ESRGAN : public GGMLRunner {
|
|||
return "esrgan";
|
||||
}
|
||||
|
||||
bool load_from_file(const std::string& file_path) {
|
||||
bool load_from_file(const std::string& file_path, int n_threads) {
|
||||
LOG_INFO("loading esrgan from '%s'", file_path.c_str());
|
||||
|
||||
alloc_params_buffer();
|
||||
|
|
@ -175,7 +177,7 @@ struct ESRGAN : public GGMLRunner {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool success = model_loader.load_tensors(esrgan_tensors, backend);
|
||||
bool success = model_loader.load_tensors(esrgan_tensors, {}, n_threads);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load esrgan tensors from model loader failed");
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "ggml_extend.hpp"
|
||||
#include "model.h"
|
||||
#include "rope.hpp"
|
||||
|
||||
#define FLUX_GRAPH_SIZE 10240
|
||||
|
||||
|
|
@ -113,6 +114,7 @@ namespace Flux {
|
|||
}
|
||||
|
||||
__STATIC_INLINE__ struct ggml_tensor* attention(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* q,
|
||||
struct ggml_tensor* k,
|
||||
struct ggml_tensor* v,
|
||||
|
|
@ -125,7 +127,7 @@ namespace Flux {
|
|||
q = apply_rope(ctx, q, pe); // [N*n_head, L, d_head]
|
||||
k = apply_rope(ctx, k, pe); // [N*n_head, L, d_head]
|
||||
|
||||
auto x = ggml_nn_attention_ext(ctx, q, k, v, v->ne[1], mask, false, true, flash_attn); // [N, L, n_head*d_head]
|
||||
auto x = ggml_nn_attention_ext(ctx, backend, q, k, v, v->ne[1], mask, false, true, flash_attn); // [N, L, n_head*d_head]
|
||||
return x;
|
||||
}
|
||||
|
||||
|
|
@ -168,13 +170,17 @@ namespace Flux {
|
|||
return x;
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* pe, struct ggml_tensor* mask) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* mask) {
|
||||
// x: [N, n_token, dim]
|
||||
// pe: [n_token, d_head/2, 2, 2]
|
||||
// return [N, n_token, dim]
|
||||
auto qkv = pre_attention(ctx, x); // q,k,v: [N, n_token, n_head, d_head]
|
||||
x = attention(ctx, qkv[0], qkv[1], qkv[2], pe, mask, flash_attn); // [N, n_token, dim]
|
||||
x = post_attention(ctx, x); // [N, n_token, dim]
|
||||
auto qkv = pre_attention(ctx, x); // q,k,v: [N, n_token, n_head, d_head]
|
||||
x = attention(ctx, backend, qkv[0], qkv[1], qkv[2], pe, mask, flash_attn); // [N, n_token, dim]
|
||||
x = post_attention(ctx, x); // [N, n_token, dim]
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
|
@ -298,6 +304,7 @@ namespace Flux {
|
|||
}
|
||||
|
||||
std::pair<struct ggml_tensor*, struct ggml_tensor*> forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* img,
|
||||
struct ggml_tensor* txt,
|
||||
struct ggml_tensor* vec,
|
||||
|
|
@ -361,8 +368,8 @@ namespace Flux {
|
|||
auto k = ggml_concat(ctx, txt_k, img_k, 2); // [N, n_txt_token + n_img_token, n_head, d_head]
|
||||
auto v = ggml_concat(ctx, txt_v, img_v, 2); // [N, n_txt_token + n_img_token, n_head, d_head]
|
||||
|
||||
auto attn = attention(ctx, q, k, v, pe, mask, flash_attn); // [N, n_txt_token + n_img_token, n_head*d_head]
|
||||
attn = ggml_cont(ctx, ggml_permute(ctx, attn, 0, 2, 1, 3)); // [n_txt_token + n_img_token, N, hidden_size]
|
||||
auto attn = attention(ctx, backend, q, k, v, pe, mask, flash_attn); // [N, n_txt_token + n_img_token, n_head*d_head]
|
||||
attn = ggml_cont(ctx, ggml_permute(ctx, attn, 0, 2, 1, 3)); // [n_txt_token + n_img_token, N, hidden_size]
|
||||
auto txt_attn_out = ggml_view_3d(ctx,
|
||||
attn,
|
||||
attn->ne[0],
|
||||
|
|
@ -445,6 +452,7 @@ namespace Flux {
|
|||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* vec,
|
||||
struct ggml_tensor* pe,
|
||||
|
|
@ -495,7 +503,7 @@ namespace Flux {
|
|||
auto v = ggml_reshape_4d(ctx, qkv_vec[2], head_dim, num_heads, qkv_vec[2]->ne[1], qkv_vec[2]->ne[2]); // [N, n_token, n_head, d_head]
|
||||
q = norm->query_norm(ctx, q);
|
||||
k = norm->key_norm(ctx, k);
|
||||
auto attn = attention(ctx, q, k, v, pe, mask, flash_attn); // [N, n_token, hidden_size]
|
||||
auto attn = attention(ctx, backend, q, k, v, pe, mask, flash_attn); // [N, n_token, hidden_size]
|
||||
|
||||
auto attn_mlp = ggml_concat(ctx, attn, ggml_gelu_inplace(ctx, mlp), 0); // [N, n_token, hidden_size + mlp_hidden_dim]
|
||||
auto output = linear2->forward(ctx, attn_mlp); // [N, n_token, hidden_size]
|
||||
|
|
@ -610,179 +618,11 @@ namespace Flux {
|
|||
};
|
||||
|
||||
struct Flux : public GGMLBlock {
|
||||
public:
|
||||
std::vector<float> linspace(float start, float end, int num) {
|
||||
std::vector<float> result(num);
|
||||
float step = (end - start) / (num - 1);
|
||||
for (int i = 0; i < num; ++i) {
|
||||
result[i] = start + i * step;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> transpose(const std::vector<std::vector<float>>& mat) {
|
||||
int rows = mat.size();
|
||||
int cols = mat[0].size();
|
||||
std::vector<std::vector<float>> transposed(cols, std::vector<float>(rows));
|
||||
for (int i = 0; i < rows; ++i) {
|
||||
for (int j = 0; j < cols; ++j) {
|
||||
transposed[j][i] = mat[i][j];
|
||||
}
|
||||
}
|
||||
return transposed;
|
||||
}
|
||||
|
||||
std::vector<float> flatten(const std::vector<std::vector<float>>& vec) {
|
||||
std::vector<float> flat_vec;
|
||||
for (const auto& sub_vec : vec) {
|
||||
flat_vec.insert(flat_vec.end(), sub_vec.begin(), sub_vec.end());
|
||||
}
|
||||
return flat_vec;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> rope(const std::vector<float>& pos, int dim, int theta) {
|
||||
assert(dim % 2 == 0);
|
||||
int half_dim = dim / 2;
|
||||
|
||||
std::vector<float> scale = linspace(0, (dim * 1.0f - 2) / dim, half_dim);
|
||||
|
||||
std::vector<float> omega(half_dim);
|
||||
for (int i = 0; i < half_dim; ++i) {
|
||||
omega[i] = 1.0 / std::pow(theta, scale[i]);
|
||||
}
|
||||
|
||||
int pos_size = pos.size();
|
||||
std::vector<std::vector<float>> out(pos_size, std::vector<float>(half_dim));
|
||||
for (int i = 0; i < pos_size; ++i) {
|
||||
for (int j = 0; j < half_dim; ++j) {
|
||||
out[i][j] = pos[i] * omega[j];
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> result(pos_size, std::vector<float>(half_dim * 4));
|
||||
for (int i = 0; i < pos_size; ++i) {
|
||||
for (int j = 0; j < half_dim; ++j) {
|
||||
result[i][4 * j] = std::cos(out[i][j]);
|
||||
result[i][4 * j + 1] = -std::sin(out[i][j]);
|
||||
result[i][4 * j + 2] = std::sin(out[i][j]);
|
||||
result[i][4 * j + 3] = std::cos(out[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Generate IDs for image patches and text
|
||||
std::vector<std::vector<float>> gen_txt_ids(int bs, int context_len) {
|
||||
return std::vector<std::vector<float>>(bs * context_len, std::vector<float>(3, 0.0));
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> gen_img_ids(int h, int w, int patch_size, int bs, int index = 0, int h_offset = 0, int w_offset = 0) {
|
||||
int h_len = (h + (patch_size / 2)) / patch_size;
|
||||
int w_len = (w + (patch_size / 2)) / patch_size;
|
||||
|
||||
std::vector<std::vector<float>> img_ids(h_len * w_len, std::vector<float>(3, 0.0));
|
||||
|
||||
std::vector<float> row_ids = linspace(h_offset, h_len - 1 + h_offset, h_len);
|
||||
std::vector<float> col_ids = linspace(w_offset, w_len - 1 + w_offset, w_len);
|
||||
|
||||
for (int i = 0; i < h_len; ++i) {
|
||||
for (int j = 0; j < w_len; ++j) {
|
||||
img_ids[i * w_len + j][0] = index;
|
||||
img_ids[i * w_len + j][1] = row_ids[i];
|
||||
img_ids[i * w_len + j][2] = col_ids[j];
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> img_ids_repeated(bs * img_ids.size(), std::vector<float>(3));
|
||||
for (int i = 0; i < bs; ++i) {
|
||||
for (int j = 0; j < img_ids.size(); ++j) {
|
||||
img_ids_repeated[i * img_ids.size() + j] = img_ids[j];
|
||||
}
|
||||
}
|
||||
return img_ids_repeated;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> concat_ids(const std::vector<std::vector<float>>& a,
|
||||
const std::vector<std::vector<float>>& b,
|
||||
int bs) {
|
||||
size_t a_len = a.size() / bs;
|
||||
size_t b_len = b.size() / bs;
|
||||
std::vector<std::vector<float>> ids(a.size() + b.size(), std::vector<float>(3));
|
||||
for (int i = 0; i < bs; ++i) {
|
||||
for (int j = 0; j < a_len; ++j) {
|
||||
ids[i * (a_len + b_len) + j] = a[i * a_len + j];
|
||||
}
|
||||
for (int j = 0; j < b_len; ++j) {
|
||||
ids[i * (a_len + b_len) + a_len + j] = b[i * b_len + j];
|
||||
}
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> gen_ids(int h, int w, int patch_size, int bs, int context_len, std::vector<ggml_tensor*> ref_latents) {
|
||||
auto txt_ids = gen_txt_ids(bs, context_len);
|
||||
auto img_ids = gen_img_ids(h, w, patch_size, bs);
|
||||
|
||||
auto ids = concat_ids(txt_ids, img_ids, bs);
|
||||
uint64_t curr_h_offset = 0;
|
||||
uint64_t curr_w_offset = 0;
|
||||
for (ggml_tensor* ref : ref_latents) {
|
||||
uint64_t h_offset = 0;
|
||||
uint64_t w_offset = 0;
|
||||
if (ref->ne[1] + curr_h_offset > ref->ne[0] + curr_w_offset) {
|
||||
w_offset = curr_w_offset;
|
||||
} else {
|
||||
h_offset = curr_h_offset;
|
||||
}
|
||||
|
||||
auto ref_ids = gen_img_ids(ref->ne[1], ref->ne[0], patch_size, bs, 1, h_offset, w_offset);
|
||||
ids = concat_ids(ids, ref_ids, bs);
|
||||
|
||||
curr_h_offset = std::max(curr_h_offset, ref->ne[1] + h_offset);
|
||||
curr_w_offset = std::max(curr_w_offset, ref->ne[0] + w_offset);
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
// Generate positional embeddings
|
||||
std::vector<float> gen_pe(int h, int w, int patch_size, int bs, int context_len, std::vector<ggml_tensor*> ref_latents, int theta, const std::vector<int>& axes_dim) {
|
||||
std::vector<std::vector<float>> ids = gen_ids(h, w, patch_size, bs, context_len, ref_latents);
|
||||
std::vector<std::vector<float>> trans_ids = transpose(ids);
|
||||
size_t pos_len = ids.size();
|
||||
int num_axes = axes_dim.size();
|
||||
for (int i = 0; i < pos_len; i++) {
|
||||
// std::cout << trans_ids[0][i] << " " << trans_ids[1][i] << " " << trans_ids[2][i] << std::endl;
|
||||
}
|
||||
|
||||
int emb_dim = 0;
|
||||
for (int d : axes_dim)
|
||||
emb_dim += d / 2;
|
||||
|
||||
std::vector<std::vector<float>> emb(bs * pos_len, std::vector<float>(emb_dim * 2 * 2, 0.0));
|
||||
int offset = 0;
|
||||
for (int i = 0; i < num_axes; ++i) {
|
||||
std::vector<std::vector<float>> rope_emb = rope(trans_ids[i], axes_dim[i], theta); // [bs*pos_len, axes_dim[i]/2 * 2 * 2]
|
||||
for (int b = 0; b < bs; ++b) {
|
||||
for (int j = 0; j < pos_len; ++j) {
|
||||
for (int k = 0; k < rope_emb[0].size(); ++k) {
|
||||
emb[b * pos_len + j][offset + k] = rope_emb[j][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
offset += rope_emb[0].size();
|
||||
}
|
||||
|
||||
return flatten(emb);
|
||||
}
|
||||
|
||||
public:
|
||||
FluxParams params;
|
||||
Flux() {}
|
||||
Flux(FluxParams params)
|
||||
: params(params) {
|
||||
int64_t pe_dim = params.hidden_size / params.num_heads;
|
||||
|
||||
blocks["img_in"] = std::shared_ptr<GGMLBlock>(new Linear(params.in_channels, params.hidden_size, true));
|
||||
if (params.is_chroma) {
|
||||
blocks["distilled_guidance_layer"] = std::shared_ptr<GGMLBlock>(new ChromaApproximator(params.in_channels, params.hidden_size));
|
||||
|
|
@ -866,6 +706,7 @@ namespace Flux {
|
|||
}
|
||||
|
||||
struct ggml_tensor* forward_orig(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* img,
|
||||
struct ggml_tensor* txt,
|
||||
struct ggml_tensor* timesteps,
|
||||
|
|
@ -930,7 +771,7 @@ namespace Flux {
|
|||
|
||||
auto block = std::dynamic_pointer_cast<DoubleStreamBlock>(blocks["double_blocks." + std::to_string(i)]);
|
||||
|
||||
auto img_txt = block->forward(ctx, img, txt, vec, pe, txt_img_mask);
|
||||
auto img_txt = block->forward(ctx, backend, img, txt, vec, pe, txt_img_mask);
|
||||
img = img_txt.first; // [N, n_img_token, hidden_size]
|
||||
txt = img_txt.second; // [N, n_txt_token, hidden_size]
|
||||
}
|
||||
|
|
@ -942,7 +783,7 @@ namespace Flux {
|
|||
}
|
||||
auto block = std::dynamic_pointer_cast<SingleStreamBlock>(blocks["single_blocks." + std::to_string(i)]);
|
||||
|
||||
txt_img = block->forward(ctx, txt_img, vec, pe, txt_img_mask);
|
||||
txt_img = block->forward(ctx, backend, txt_img, vec, pe, txt_img_mask);
|
||||
}
|
||||
|
||||
txt_img = ggml_cont(ctx, ggml_permute(ctx, txt_img, 0, 2, 1, 3)); // [n_txt_token + n_img_token, N, hidden_size]
|
||||
|
|
@ -975,6 +816,7 @@ namespace Flux {
|
|||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* timestep,
|
||||
struct ggml_tensor* context,
|
||||
|
|
@ -1024,7 +866,7 @@ namespace Flux {
|
|||
}
|
||||
}
|
||||
|
||||
auto out = forward_orig(ctx, img, context, timestep, y, guidance, pe, mod_index_arange, skip_layers); // [N, num_tokens, C * patch_size * patch_size]
|
||||
auto out = forward_orig(ctx, backend, img, context, timestep, y, guidance, pe, mod_index_arange, skip_layers); // [N, num_tokens, C * patch_size * patch_size]
|
||||
if (out->ne[1] > img_tokens) {
|
||||
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size]
|
||||
out = ggml_view_3d(ctx, out, out->ne[0], out->ne[1], img_tokens, out->nb[1], out->nb[2], 0);
|
||||
|
|
@ -1048,12 +890,13 @@ namespace Flux {
|
|||
bool use_mask = false;
|
||||
|
||||
FluxRunner(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
const std::string prefix = "",
|
||||
SDVersion version = VERSION_FLUX,
|
||||
bool flash_attn = false,
|
||||
bool use_mask = false)
|
||||
: GGMLRunner(backend), use_mask(use_mask) {
|
||||
: GGMLRunner(backend, offload_params_to_cpu), use_mask(use_mask) {
|
||||
flux_params.flash_attn = flash_attn;
|
||||
flux_params.guidance_embed = false;
|
||||
flux_params.depth = 0;
|
||||
|
|
@ -1063,7 +906,7 @@ namespace Flux {
|
|||
}
|
||||
for (auto pair : tensor_types) {
|
||||
std::string tensor_name = pair.first;
|
||||
if (tensor_name.find("model.diffusion_model.") == std::string::npos)
|
||||
if (!starts_with(tensor_name, prefix))
|
||||
continue;
|
||||
if (tensor_name.find("guidance_in.in_layer.weight") != std::string::npos) {
|
||||
// not schnell
|
||||
|
|
@ -1117,6 +960,7 @@ namespace Flux {
|
|||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
bool increase_ref_index = false,
|
||||
std::vector<int> skip_layers = {}) {
|
||||
GGML_ASSERT(x->ne[3] == 1);
|
||||
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, FLUX_GRAPH_SIZE, false);
|
||||
|
|
@ -1150,7 +994,15 @@ namespace Flux {
|
|||
ref_latents[i] = to_backend(ref_latents[i]);
|
||||
}
|
||||
|
||||
pe_vec = flux.gen_pe(x->ne[1], x->ne[0], 2, x->ne[3], context->ne[1], ref_latents, flux_params.theta, flux_params.axes_dim);
|
||||
pe_vec = Rope::gen_flux_pe(x->ne[1],
|
||||
x->ne[0],
|
||||
2,
|
||||
x->ne[3],
|
||||
context->ne[1],
|
||||
ref_latents,
|
||||
increase_ref_index,
|
||||
flux_params.theta,
|
||||
flux_params.axes_dim);
|
||||
int pos_len = pe_vec.size() / flux_params.axes_dim_sum / 2;
|
||||
// LOG_DEBUG("pos_len %d", pos_len);
|
||||
auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, flux_params.axes_dim_sum / 2, pos_len);
|
||||
|
|
@ -1160,6 +1012,7 @@ namespace Flux {
|
|||
set_backend_tensor_data(pe, pe_vec.data());
|
||||
|
||||
struct ggml_tensor* out = flux.forward(compute_ctx,
|
||||
runtime_backend,
|
||||
x,
|
||||
timesteps,
|
||||
context,
|
||||
|
|
@ -1184,6 +1037,7 @@ namespace Flux {
|
|||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
bool increase_ref_index = false,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL,
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
|
|
@ -1193,7 +1047,7 @@ namespace Flux {
|
|||
// y: [N, adm_in_channels] or [1, adm_in_channels]
|
||||
// guidance: [N, ]
|
||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_graph(x, timesteps, context, c_concat, y, guidance, ref_latents, skip_layers);
|
||||
return build_graph(x, timesteps, context, c_concat, y, guidance, ref_latents, increase_ref_index, skip_layers);
|
||||
};
|
||||
|
||||
GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
||||
|
|
@ -1233,7 +1087,7 @@ namespace Flux {
|
|||
struct ggml_tensor* out = NULL;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
compute(8, x, timesteps, context, NULL, y, guidance, {}, &out, work_ctx);
|
||||
compute(8, x, timesteps, context, NULL, y, guidance, {}, false, &out, work_ctx);
|
||||
int t1 = ggml_time_ms();
|
||||
|
||||
print_ggml_tensor(out);
|
||||
|
|
@ -1245,7 +1099,7 @@ namespace Flux {
|
|||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_Q8_0;
|
||||
std::shared_ptr<FluxRunner> flux = std::shared_ptr<FluxRunner>(new FluxRunner(backend));
|
||||
std::shared_ptr<FluxRunner> flux = std::shared_ptr<FluxRunner>(new FluxRunner(backend, false));
|
||||
{
|
||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||
|
||||
|
|
@ -1259,7 +1113,7 @@ namespace Flux {
|
|||
return;
|
||||
}
|
||||
|
||||
bool success = model_loader.load_tensors(tensors, backend);
|
||||
bool success = model_loader.load_tensors(tensors);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
231
otherarch/sdcpp/gguf_reader.hpp
Normal file
231
otherarch/sdcpp/gguf_reader.hpp
Normal file
|
|
@ -0,0 +1,231 @@
|
|||
#ifndef __GGUF_READER_HPP__
|
||||
#define __GGUF_READER_HPP__
|
||||
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ggml.h"
|
||||
#include "util.h"
|
||||
|
||||
struct GGUFTensorInfo {
|
||||
std::string name;
|
||||
ggml_type type;
|
||||
std::vector<int64_t> shape;
|
||||
size_t offset;
|
||||
};
|
||||
|
||||
enum class GGUFMetadataType : uint32_t {
|
||||
UINT8 = 0,
|
||||
INT8 = 1,
|
||||
UINT16 = 2,
|
||||
INT16 = 3,
|
||||
UINT32 = 4,
|
||||
INT32 = 5,
|
||||
FLOAT32 = 6,
|
||||
BOOL = 7,
|
||||
STRING = 8,
|
||||
ARRAY = 9,
|
||||
UINT64 = 10,
|
||||
INT64 = 11,
|
||||
FLOAT64 = 12,
|
||||
};
|
||||
|
||||
class GGUFReader {
|
||||
private:
|
||||
std::vector<GGUFTensorInfo> tensors_;
|
||||
size_t data_offset_;
|
||||
size_t alignment_ = 32; // default alignment is 32
|
||||
|
||||
template <typename T>
|
||||
bool safe_read(std::ifstream& fin, T& value) {
|
||||
fin.read(reinterpret_cast<char*>(&value), sizeof(T));
|
||||
return fin.good();
|
||||
}
|
||||
|
||||
bool safe_read(std::ifstream& fin, char* buffer, size_t size) {
|
||||
fin.read(buffer, size);
|
||||
return fin.good();
|
||||
}
|
||||
|
||||
bool safe_seek(std::ifstream& fin, std::streamoff offset, std::ios::seekdir dir) {
|
||||
fin.seekg(offset, dir);
|
||||
return fin.good();
|
||||
}
|
||||
|
||||
bool read_metadata(std::ifstream& fin) {
|
||||
uint64_t key_len = 0;
|
||||
if (!safe_read(fin, key_len))
|
||||
return false;
|
||||
|
||||
std::string key(key_len, '\0');
|
||||
if (!safe_read(fin, (char*)key.data(), key_len))
|
||||
return false;
|
||||
|
||||
uint32_t type = 0;
|
||||
if (!safe_read(fin, type))
|
||||
return false;
|
||||
|
||||
if (key == "general.alignment") {
|
||||
uint32_t align_val = 0;
|
||||
if (!safe_read(fin, align_val))
|
||||
return false;
|
||||
|
||||
if (align_val != 0 && (align_val & (align_val - 1)) == 0) {
|
||||
alignment_ = align_val;
|
||||
LOG_DEBUG("Found alignment: %zu", alignment_);
|
||||
} else {
|
||||
LOG_ERROR("Invalid alignment value %u, fallback to default %zu", align_val, alignment_);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (static_cast<GGUFMetadataType>(type)) {
|
||||
case GGUFMetadataType::UINT8:
|
||||
case GGUFMetadataType::INT8:
|
||||
case GGUFMetadataType::BOOL:
|
||||
return safe_seek(fin, 1, std::ios::cur);
|
||||
|
||||
case GGUFMetadataType::UINT16:
|
||||
case GGUFMetadataType::INT16:
|
||||
return safe_seek(fin, 2, std::ios::cur);
|
||||
|
||||
case GGUFMetadataType::UINT32:
|
||||
case GGUFMetadataType::INT32:
|
||||
case GGUFMetadataType::FLOAT32:
|
||||
return safe_seek(fin, 4, std::ios::cur);
|
||||
|
||||
case GGUFMetadataType::UINT64:
|
||||
case GGUFMetadataType::INT64:
|
||||
case GGUFMetadataType::FLOAT64:
|
||||
return safe_seek(fin, 8, std::ios::cur);
|
||||
|
||||
case GGUFMetadataType::STRING: {
|
||||
uint64_t len = 0;
|
||||
if (!safe_read(fin, len))
|
||||
return false;
|
||||
return safe_seek(fin, len, std::ios::cur);
|
||||
}
|
||||
|
||||
case GGUFMetadataType::ARRAY: {
|
||||
uint32_t elem_type = 0;
|
||||
uint64_t len = 0;
|
||||
if (!safe_read(fin, elem_type))
|
||||
return false;
|
||||
if (!safe_read(fin, len))
|
||||
return false;
|
||||
|
||||
for (uint64_t i = 0; i < len; i++) {
|
||||
if (!read_metadata(fin))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
LOG_ERROR("Unknown metadata type=%u", type);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
GGUFTensorInfo read_tensor_info(std::ifstream& fin) {
|
||||
GGUFTensorInfo info;
|
||||
|
||||
uint64_t name_len;
|
||||
if (!safe_read(fin, name_len))
|
||||
throw std::runtime_error("read tensor name length failed");
|
||||
|
||||
info.name.resize(name_len);
|
||||
if (!safe_read(fin, (char*)info.name.data(), name_len))
|
||||
throw std::runtime_error("read tensor name failed");
|
||||
|
||||
uint32_t n_dims;
|
||||
if (!safe_read(fin, n_dims))
|
||||
throw std::runtime_error("read tensor dims failed");
|
||||
|
||||
info.shape.resize(n_dims);
|
||||
for (uint32_t i = 0; i < n_dims; i++) {
|
||||
if (!safe_read(fin, info.shape[i]))
|
||||
throw std::runtime_error("read tensor shape failed");
|
||||
}
|
||||
|
||||
if (n_dims > GGML_MAX_DIMS) {
|
||||
for (int i = GGML_MAX_DIMS; i < n_dims; i++) {
|
||||
info.shape[GGML_MAX_DIMS - 1] *= info.shape[i]; // stack to last dim;
|
||||
}
|
||||
info.shape.resize(GGML_MAX_DIMS);
|
||||
n_dims = GGML_MAX_DIMS;
|
||||
}
|
||||
|
||||
uint32_t type;
|
||||
if (!safe_read(fin, type))
|
||||
throw std::runtime_error("read tensor type failed");
|
||||
info.type = static_cast<ggml_type>(type);
|
||||
|
||||
if (!safe_read(fin, info.offset))
|
||||
throw std::runtime_error("read tensor offset failed");
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
public:
|
||||
bool load(const std::string& file_path) {
|
||||
std::ifstream fin(file_path, std::ios::binary);
|
||||
if (!fin) {
|
||||
LOG_ERROR("failed to open '%s'", file_path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
// --- Header ---
|
||||
char magic[4];
|
||||
if (!safe_read(fin, magic, 4) || strncmp(magic, "GGUF", 4) != 0) {
|
||||
LOG_ERROR("not a valid GGUF file");
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t version;
|
||||
if (!safe_read(fin, version))
|
||||
return false;
|
||||
|
||||
uint64_t tensor_count, metadata_kv_count;
|
||||
if (!safe_read(fin, tensor_count))
|
||||
return false;
|
||||
if (!safe_read(fin, metadata_kv_count))
|
||||
return false;
|
||||
|
||||
LOG_DEBUG("GGUF v%u, tensor_count=%llu, metadata_kv_count=%llu",
|
||||
version, (unsigned long long)tensor_count, (unsigned long long)metadata_kv_count);
|
||||
|
||||
// --- Read Metadata ---
|
||||
for (uint64_t i = 0; i < metadata_kv_count; i++) {
|
||||
if (!read_metadata(fin)) {
|
||||
LOG_ERROR("read meta data failed");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// --- Tensor Infos ---
|
||||
tensors_.clear();
|
||||
try {
|
||||
for (uint64_t i = 0; i < tensor_count; i++) {
|
||||
tensors_.push_back(read_tensor_info(fin));
|
||||
}
|
||||
} catch (const std::runtime_error& e) {
|
||||
LOG_ERROR("%s", e.what());
|
||||
return false;
|
||||
}
|
||||
|
||||
data_offset_ = static_cast<size_t>(fin.tellg());
|
||||
if ((data_offset_ % alignment_) != 0) {
|
||||
data_offset_ = ((data_offset_ + alignment_ - 1) / alignment_) * alignment_;
|
||||
}
|
||||
fin.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::vector<GGUFTensorInfo>& tensors() const { return tensors_; }
|
||||
size_t data_offset() const { return data_offset_; }
|
||||
};
|
||||
|
||||
#endif // __GGUF_READER_HPP__
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef __LORA_HPP__
|
||||
#define __LORA_HPP__
|
||||
|
||||
#include <mutex>
|
||||
#include "ggml_extend.hpp"
|
||||
|
||||
#define LORA_GRAPH_BASE_SIZE 10240
|
||||
|
|
@ -58,6 +59,7 @@ struct LoraModel : public GGMLRunner {
|
|||
{"x_block.attn.proj", "attn.to_out.0"},
|
||||
{"x_block.attn2.proj", "attn2.to_out.0"},
|
||||
// flux
|
||||
{"img_in", "x_embedder"},
|
||||
// singlestream
|
||||
{"linear2", "proj_out"},
|
||||
{"modulation.lin", "norm.linear"},
|
||||
|
|
@ -92,6 +94,7 @@ struct LoraModel : public GGMLRunner {
|
|||
|
||||
float multiplier = 1.0f;
|
||||
std::map<std::string, struct ggml_tensor*> lora_tensors;
|
||||
std::map<ggml_tensor*, ggml_tensor*> original_tensor_to_final_tensor;
|
||||
std::string file_path;
|
||||
ModelLoader model_loader;
|
||||
bool load_failed = false;
|
||||
|
|
@ -103,7 +106,7 @@ struct LoraModel : public GGMLRunner {
|
|||
LoraModel(ggml_backend_t backend,
|
||||
const std::string& file_path = "",
|
||||
const std::string prefix = "")
|
||||
: file_path(file_path), GGMLRunner(backend) {
|
||||
: file_path(file_path), GGMLRunner(backend, false) {
|
||||
if (!model_loader.init_from_file(file_path, prefix)) {
|
||||
load_failed = true;
|
||||
}
|
||||
|
|
@ -113,7 +116,7 @@ struct LoraModel : public GGMLRunner {
|
|||
return "lora";
|
||||
}
|
||||
|
||||
bool load_from_file(bool filter_tensor = false) {
|
||||
bool load_from_file(bool filter_tensor, int n_threads) {
|
||||
LOG_INFO("loading LoRA from '%s'", file_path.c_str());
|
||||
|
||||
if (load_failed) {
|
||||
|
|
@ -121,41 +124,53 @@ struct LoraModel : public GGMLRunner {
|
|||
return false;
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, TensorStorage> tensors_to_create;
|
||||
std::mutex lora_mutex;
|
||||
bool dry_run = true;
|
||||
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
|
||||
const std::string& name = tensor_storage.name;
|
||||
if (dry_run) {
|
||||
const std::string& name = tensor_storage.name;
|
||||
|
||||
if (filter_tensor && !contains(name, "lora")) {
|
||||
// LOG_INFO("skipping LoRA tesnor '%s'", name.c_str());
|
||||
return true;
|
||||
}
|
||||
// LOG_INFO("%s", name.c_str());
|
||||
for (int i = 0; i < LORA_TYPE_COUNT; i++) {
|
||||
if (name.find(type_fingerprints[i]) != std::string::npos) {
|
||||
type = (lora_t)i;
|
||||
break;
|
||||
if (filter_tensor && !contains(name, "lora")) {
|
||||
return true;
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(lora_mutex);
|
||||
for (int i = 0; i < LORA_TYPE_COUNT; i++) {
|
||||
if (name.find(type_fingerprints[i]) != std::string::npos) {
|
||||
type = (lora_t)i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
tensors_to_create[name] = tensor_storage;
|
||||
}
|
||||
} else {
|
||||
const std::string& name = tensor_storage.name;
|
||||
auto iter = lora_tensors.find(name);
|
||||
if (iter != lora_tensors.end()) {
|
||||
*dst_tensor = iter->second;
|
||||
}
|
||||
}
|
||||
|
||||
if (dry_run) {
|
||||
struct ggml_tensor* real = ggml_new_tensor(params_ctx,
|
||||
tensor_storage.type,
|
||||
tensor_storage.n_dims,
|
||||
tensor_storage.ne);
|
||||
lora_tensors[name] = real;
|
||||
} else {
|
||||
auto real = lora_tensors[name];
|
||||
*dst_tensor = real;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
model_loader.load_tensors(on_new_tensor_cb, backend);
|
||||
model_loader.load_tensors(on_new_tensor_cb, n_threads);
|
||||
|
||||
for (const auto& pair : tensors_to_create) {
|
||||
const auto& name = pair.first;
|
||||
const auto& ts = pair.second;
|
||||
struct ggml_tensor* real = ggml_new_tensor(params_ctx,
|
||||
ts.type,
|
||||
ts.n_dims,
|
||||
ts.ne);
|
||||
lora_tensors[name] = real;
|
||||
}
|
||||
|
||||
alloc_params_buffer();
|
||||
// exit(0);
|
||||
|
||||
dry_run = false;
|
||||
model_loader.load_tensors(on_new_tensor_cb, backend);
|
||||
model_loader.load_tensors(on_new_tensor_cb, n_threads);
|
||||
|
||||
LOG_DEBUG("lora type: \"%s\"/\"%s\"", lora_downs[type].c_str(), lora_ups[type].c_str());
|
||||
|
||||
|
|
@ -167,6 +182,7 @@ struct LoraModel : public GGMLRunner {
|
|||
auto out = ggml_reshape_1d(ctx, a, ggml_nelements(a));
|
||||
out = ggml_get_rows(ctx, out, zero_index);
|
||||
out = ggml_reshape(ctx, out, a);
|
||||
// auto out = ggml_cast(ctx, a, GGML_TYPE_F32);
|
||||
return out;
|
||||
}
|
||||
|
||||
|
|
@ -245,14 +261,22 @@ struct LoraModel : public GGMLRunner {
|
|||
set_backend_tensor_data(zero_index, zero_index_vec.data());
|
||||
ggml_build_forward_expand(gf, zero_index);
|
||||
|
||||
original_tensor_to_final_tensor.clear();
|
||||
|
||||
std::set<std::string> applied_lora_tensors;
|
||||
for (auto it : model_tensors) {
|
||||
std::string k_tensor = it.first;
|
||||
struct ggml_tensor* weight = model_tensors[it.first];
|
||||
std::string model_tensor_name = it.first;
|
||||
struct ggml_tensor* model_tensor = model_tensors[it.first];
|
||||
|
||||
std::vector<std::string> keys = to_lora_keys(k_tensor, version);
|
||||
if (keys.size() == 0)
|
||||
continue;
|
||||
std::vector<std::string> keys = to_lora_keys(model_tensor_name, version);
|
||||
bool is_bias = ends_with(model_tensor_name, ".bias");
|
||||
if (keys.size() == 0) {
|
||||
if (is_bias) {
|
||||
keys.push_back(model_tensor_name.substr(0, model_tensor_name.size() - 5)); // remove .bias
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& key : keys) {
|
||||
bool is_qkv_split = starts_with(key, "SPLIT|");
|
||||
|
|
@ -265,8 +289,22 @@ struct LoraModel : public GGMLRunner {
|
|||
}
|
||||
struct ggml_tensor* updown = NULL;
|
||||
float scale_value = 1.0f;
|
||||
std::string fk = lora_pre[type] + key;
|
||||
if (lora_tensors.find(fk + ".hada_w1_a") != lora_tensors.end()) {
|
||||
std::string full_key = lora_pre[type] + key;
|
||||
if (is_bias) {
|
||||
if (lora_tensors.find(full_key + ".diff_b") != lora_tensors.end()) {
|
||||
std::string diff_name = full_key + ".diff_b";
|
||||
ggml_tensor* diff = lora_tensors[diff_name];
|
||||
updown = to_f32(compute_ctx, diff);
|
||||
applied_lora_tensors.insert(diff_name);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
} else if (lora_tensors.find(full_key + ".diff") != lora_tensors.end()) {
|
||||
std::string diff_name = full_key + ".diff";
|
||||
ggml_tensor* diff = lora_tensors[diff_name];
|
||||
updown = to_f32(compute_ctx, diff);
|
||||
applied_lora_tensors.insert(diff_name);
|
||||
} else if (lora_tensors.find(full_key + ".hada_w1_a") != lora_tensors.end()) {
|
||||
// LoHa mode
|
||||
|
||||
// TODO: split qkv convention for LoHas (is it ever used?)
|
||||
|
|
@ -292,9 +330,9 @@ struct LoraModel : public GGMLRunner {
|
|||
std::string hada_2_down_name = "";
|
||||
std::string hada_2_up_name = "";
|
||||
|
||||
hada_1_down_name = fk + ".hada_w1_b";
|
||||
hada_1_up_name = fk + ".hada_w1_a";
|
||||
hada_1_mid_name = fk + ".hada_t1";
|
||||
hada_1_down_name = full_key + ".hada_w1_b";
|
||||
hada_1_up_name = full_key + ".hada_w1_a";
|
||||
hada_1_mid_name = full_key + ".hada_t1";
|
||||
if (lora_tensors.find(hada_1_down_name) != lora_tensors.end()) {
|
||||
hada_1_down = to_f32(compute_ctx, lora_tensors[hada_1_down_name]);
|
||||
}
|
||||
|
|
@ -307,9 +345,9 @@ struct LoraModel : public GGMLRunner {
|
|||
hada_1_up = ggml_cont(compute_ctx, ggml_transpose(compute_ctx, hada_1_up));
|
||||
}
|
||||
|
||||
hada_2_down_name = fk + ".hada_w2_b";
|
||||
hada_2_up_name = fk + ".hada_w2_a";
|
||||
hada_2_mid_name = fk + ".hada_t2";
|
||||
hada_2_down_name = full_key + ".hada_w2_b";
|
||||
hada_2_up_name = full_key + ".hada_w2_a";
|
||||
hada_2_mid_name = full_key + ".hada_t2";
|
||||
if (lora_tensors.find(hada_2_down_name) != lora_tensors.end()) {
|
||||
hada_2_down = to_f32(compute_ctx, lora_tensors[hada_2_down_name]);
|
||||
}
|
||||
|
|
@ -322,7 +360,7 @@ struct LoraModel : public GGMLRunner {
|
|||
hada_2_up = ggml_cont(compute_ctx, ggml_transpose(compute_ctx, hada_2_up));
|
||||
}
|
||||
|
||||
alpha_name = fk + ".alpha";
|
||||
alpha_name = full_key + ".alpha";
|
||||
|
||||
applied_lora_tensors.insert(hada_1_down_name);
|
||||
applied_lora_tensors.insert(hada_1_up_name);
|
||||
|
|
@ -345,7 +383,7 @@ struct LoraModel : public GGMLRunner {
|
|||
float alpha = ggml_backend_tensor_get_f32(lora_tensors[alpha_name]);
|
||||
scale_value = alpha / rank;
|
||||
}
|
||||
} else if (lora_tensors.find(fk + ".lokr_w1") != lora_tensors.end() || lora_tensors.find(fk + ".lokr_w1_a") != lora_tensors.end()) {
|
||||
} else if (lora_tensors.find(full_key + ".lokr_w1") != lora_tensors.end() || lora_tensors.find(full_key + ".lokr_w1_a") != lora_tensors.end()) {
|
||||
// LoKr mode
|
||||
|
||||
// TODO: split qkv convention for LoKrs (is it ever used?)
|
||||
|
|
@ -354,7 +392,7 @@ struct LoraModel : public GGMLRunner {
|
|||
break;
|
||||
}
|
||||
|
||||
std::string alpha_name = fk + ".alpha";
|
||||
std::string alpha_name = full_key + ".alpha";
|
||||
|
||||
ggml_tensor* lokr_w1 = NULL;
|
||||
ggml_tensor* lokr_w2 = NULL;
|
||||
|
|
@ -362,8 +400,8 @@ struct LoraModel : public GGMLRunner {
|
|||
std::string lokr_w1_name = "";
|
||||
std::string lokr_w2_name = "";
|
||||
|
||||
lokr_w1_name = fk + ".lokr_w1";
|
||||
lokr_w2_name = fk + ".lokr_w2";
|
||||
lokr_w1_name = full_key + ".lokr_w1";
|
||||
lokr_w2_name = full_key + ".lokr_w2";
|
||||
|
||||
if (lora_tensors.find(lokr_w1_name) != lora_tensors.end()) {
|
||||
lokr_w1 = to_f32(compute_ctx, lora_tensors[lokr_w1_name]);
|
||||
|
|
@ -435,29 +473,29 @@ struct LoraModel : public GGMLRunner {
|
|||
|
||||
if (is_qkv_split) {
|
||||
std::string suffix = "";
|
||||
auto split_q_d_name = fk + "q" + suffix + lora_downs[type] + ".weight";
|
||||
auto split_q_d_name = full_key + "q" + suffix + lora_downs[type] + ".weight";
|
||||
|
||||
if (lora_tensors.find(split_q_d_name) == lora_tensors.end()) {
|
||||
suffix = "_proj";
|
||||
split_q_d_name = fk + "q" + suffix + lora_downs[type] + ".weight";
|
||||
split_q_d_name = full_key + "q" + suffix + lora_downs[type] + ".weight";
|
||||
}
|
||||
if (lora_tensors.find(split_q_d_name) != lora_tensors.end()) {
|
||||
// print_ggml_tensor(it.second, true); //[3072, 21504, 1, 1]
|
||||
// find qkv and mlp up parts in LoRA model
|
||||
auto split_k_d_name = fk + "k" + suffix + lora_downs[type] + ".weight";
|
||||
auto split_v_d_name = fk + "v" + suffix + lora_downs[type] + ".weight";
|
||||
auto split_k_d_name = full_key + "k" + suffix + lora_downs[type] + ".weight";
|
||||
auto split_v_d_name = full_key + "v" + suffix + lora_downs[type] + ".weight";
|
||||
|
||||
auto split_q_u_name = fk + "q" + suffix + lora_ups[type] + ".weight";
|
||||
auto split_k_u_name = fk + "k" + suffix + lora_ups[type] + ".weight";
|
||||
auto split_v_u_name = fk + "v" + suffix + lora_ups[type] + ".weight";
|
||||
auto split_q_u_name = full_key + "q" + suffix + lora_ups[type] + ".weight";
|
||||
auto split_k_u_name = full_key + "k" + suffix + lora_ups[type] + ".weight";
|
||||
auto split_v_u_name = full_key + "v" + suffix + lora_ups[type] + ".weight";
|
||||
|
||||
auto split_q_scale_name = fk + "q" + suffix + ".scale";
|
||||
auto split_k_scale_name = fk + "k" + suffix + ".scale";
|
||||
auto split_v_scale_name = fk + "v" + suffix + ".scale";
|
||||
auto split_q_scale_name = full_key + "q" + suffix + ".scale";
|
||||
auto split_k_scale_name = full_key + "k" + suffix + ".scale";
|
||||
auto split_v_scale_name = full_key + "v" + suffix + ".scale";
|
||||
|
||||
auto split_q_alpha_name = fk + "q" + suffix + ".alpha";
|
||||
auto split_k_alpha_name = fk + "k" + suffix + ".alpha";
|
||||
auto split_v_alpha_name = fk + "v" + suffix + ".alpha";
|
||||
auto split_q_alpha_name = full_key + "q" + suffix + ".alpha";
|
||||
auto split_k_alpha_name = full_key + "k" + suffix + ".alpha";
|
||||
auto split_v_alpha_name = full_key + "v" + suffix + ".alpha";
|
||||
|
||||
ggml_tensor* lora_q_down = NULL;
|
||||
ggml_tensor* lora_q_up = NULL;
|
||||
|
|
@ -571,29 +609,29 @@ struct LoraModel : public GGMLRunner {
|
|||
applied_lora_tensors.insert(split_v_d_name);
|
||||
}
|
||||
} else if (is_qkvm_split) {
|
||||
auto split_q_d_name = fk + "attn.to_q" + lora_downs[type] + ".weight";
|
||||
auto split_q_d_name = full_key + "attn.to_q" + lora_downs[type] + ".weight";
|
||||
if (lora_tensors.find(split_q_d_name) != lora_tensors.end()) {
|
||||
// print_ggml_tensor(it.second, true); //[3072, 21504, 1, 1]
|
||||
// find qkv and mlp up parts in LoRA model
|
||||
auto split_k_d_name = fk + "attn.to_k" + lora_downs[type] + ".weight";
|
||||
auto split_v_d_name = fk + "attn.to_v" + lora_downs[type] + ".weight";
|
||||
auto split_k_d_name = full_key + "attn.to_k" + lora_downs[type] + ".weight";
|
||||
auto split_v_d_name = full_key + "attn.to_v" + lora_downs[type] + ".weight";
|
||||
|
||||
auto split_q_u_name = fk + "attn.to_q" + lora_ups[type] + ".weight";
|
||||
auto split_k_u_name = fk + "attn.to_k" + lora_ups[type] + ".weight";
|
||||
auto split_v_u_name = fk + "attn.to_v" + lora_ups[type] + ".weight";
|
||||
auto split_q_u_name = full_key + "attn.to_q" + lora_ups[type] + ".weight";
|
||||
auto split_k_u_name = full_key + "attn.to_k" + lora_ups[type] + ".weight";
|
||||
auto split_v_u_name = full_key + "attn.to_v" + lora_ups[type] + ".weight";
|
||||
|
||||
auto split_m_d_name = fk + "proj_mlp" + lora_downs[type] + ".weight";
|
||||
auto split_m_u_name = fk + "proj_mlp" + lora_ups[type] + ".weight";
|
||||
auto split_m_d_name = full_key + "proj_mlp" + lora_downs[type] + ".weight";
|
||||
auto split_m_u_name = full_key + "proj_mlp" + lora_ups[type] + ".weight";
|
||||
|
||||
auto split_q_scale_name = fk + "attn.to_q" + ".scale";
|
||||
auto split_k_scale_name = fk + "attn.to_k" + ".scale";
|
||||
auto split_v_scale_name = fk + "attn.to_v" + ".scale";
|
||||
auto split_m_scale_name = fk + "proj_mlp" + ".scale";
|
||||
auto split_q_scale_name = full_key + "attn.to_q" + ".scale";
|
||||
auto split_k_scale_name = full_key + "attn.to_k" + ".scale";
|
||||
auto split_v_scale_name = full_key + "attn.to_v" + ".scale";
|
||||
auto split_m_scale_name = full_key + "proj_mlp" + ".scale";
|
||||
|
||||
auto split_q_alpha_name = fk + "attn.to_q" + ".alpha";
|
||||
auto split_k_alpha_name = fk + "attn.to_k" + ".alpha";
|
||||
auto split_v_alpha_name = fk + "attn.to_v" + ".alpha";
|
||||
auto split_m_alpha_name = fk + "proj_mlp" + ".alpha";
|
||||
auto split_q_alpha_name = full_key + "attn.to_q" + ".alpha";
|
||||
auto split_k_alpha_name = full_key + "attn.to_k" + ".alpha";
|
||||
auto split_v_alpha_name = full_key + "attn.to_v" + ".alpha";
|
||||
auto split_m_alpha_name = full_key + "proj_mlp" + ".alpha";
|
||||
|
||||
ggml_tensor* lora_q_down = NULL;
|
||||
ggml_tensor* lora_q_up = NULL;
|
||||
|
|
@ -748,30 +786,27 @@ struct LoraModel : public GGMLRunner {
|
|||
applied_lora_tensors.insert(split_m_d_name);
|
||||
}
|
||||
} else {
|
||||
lora_up_name = fk + lora_ups[type] + ".weight";
|
||||
lora_down_name = fk + lora_downs[type] + ".weight";
|
||||
lora_mid_name = fk + ".lora_mid.weight";
|
||||
lora_up_name = full_key + lora_ups[type] + ".weight";
|
||||
lora_down_name = full_key + lora_downs[type] + ".weight";
|
||||
lora_mid_name = full_key + ".lora_mid.weight";
|
||||
|
||||
alpha_name = fk + ".alpha";
|
||||
scale_name = fk + ".scale";
|
||||
alpha_name = full_key + ".alpha";
|
||||
scale_name = full_key + ".scale";
|
||||
|
||||
if (lora_tensors.find(lora_up_name) != lora_tensors.end()) {
|
||||
lora_up = to_f32(compute_ctx, lora_tensors[lora_up_name]);
|
||||
applied_lora_tensors.insert(lora_up_name);
|
||||
}
|
||||
|
||||
if (lora_tensors.find(lora_down_name) != lora_tensors.end()) {
|
||||
lora_down = to_f32(compute_ctx, lora_tensors[lora_down_name]);
|
||||
applied_lora_tensors.insert(lora_down_name);
|
||||
}
|
||||
|
||||
if (lora_tensors.find(lora_mid_name) != lora_tensors.end()) {
|
||||
lora_mid = to_f32(compute_ctx, lora_tensors[lora_mid_name]);
|
||||
applied_lora_tensors.insert(lora_mid_name);
|
||||
}
|
||||
|
||||
applied_lora_tensors.insert(lora_up_name);
|
||||
applied_lora_tensors.insert(lora_down_name);
|
||||
applied_lora_tensors.insert(alpha_name);
|
||||
applied_lora_tensors.insert(scale_name);
|
||||
}
|
||||
|
||||
if (lora_up == NULL || lora_down == NULL) {
|
||||
|
|
@ -782,29 +817,37 @@ struct LoraModel : public GGMLRunner {
|
|||
int64_t rank = lora_down->ne[ggml_n_dims(lora_down) - 1];
|
||||
if (lora_tensors.find(scale_name) != lora_tensors.end()) {
|
||||
scale_value = ggml_backend_tensor_get_f32(lora_tensors[scale_name]);
|
||||
applied_lora_tensors.insert(scale_name);
|
||||
} else if (lora_tensors.find(alpha_name) != lora_tensors.end()) {
|
||||
float alpha = ggml_backend_tensor_get_f32(lora_tensors[alpha_name]);
|
||||
scale_value = alpha / rank;
|
||||
// LOG_DEBUG("rank %s %ld %.2f %.2f", alpha_name.c_str(), rank, alpha, scale_value);
|
||||
applied_lora_tensors.insert(alpha_name);
|
||||
}
|
||||
|
||||
updown = ggml_merge_lora(compute_ctx, lora_down, lora_up, lora_mid);
|
||||
}
|
||||
scale_value *= multiplier;
|
||||
updown = ggml_reshape(compute_ctx, updown, weight);
|
||||
GGML_ASSERT(ggml_nelements(updown) == ggml_nelements(weight));
|
||||
updown = ggml_scale_inplace(compute_ctx, updown, scale_value);
|
||||
ggml_tensor* final_weight;
|
||||
if (weight->type != GGML_TYPE_F32 && weight->type != GGML_TYPE_F16) {
|
||||
// final_weight = ggml_new_tensor(compute_ctx, GGML_TYPE_F32, ggml_n_dims(weight), weight->ne);
|
||||
// final_weight = ggml_cpy(compute_ctx, weight, final_weight);
|
||||
final_weight = to_f32(compute_ctx, weight);
|
||||
final_weight = ggml_add_inplace(compute_ctx, final_weight, updown);
|
||||
final_weight = ggml_cpy(compute_ctx, final_weight, weight);
|
||||
} else {
|
||||
final_weight = ggml_add_inplace(compute_ctx, weight, updown);
|
||||
ggml_tensor* original_tensor = model_tensor;
|
||||
if (!ggml_backend_is_cpu(runtime_backend) && ggml_backend_buffer_is_host(original_tensor->buffer)) {
|
||||
model_tensor = ggml_dup_tensor(compute_ctx, model_tensor);
|
||||
set_backend_tensor_data(model_tensor, original_tensor->data);
|
||||
}
|
||||
updown = ggml_reshape(compute_ctx, updown, model_tensor);
|
||||
GGML_ASSERT(ggml_nelements(updown) == ggml_nelements(model_tensor));
|
||||
updown = ggml_scale_inplace(compute_ctx, updown, scale_value);
|
||||
ggml_tensor* final_tensor;
|
||||
if (model_tensor->type != GGML_TYPE_F32 && model_tensor->type != GGML_TYPE_F16) {
|
||||
final_tensor = to_f32(compute_ctx, model_tensor);
|
||||
final_tensor = ggml_add_inplace(compute_ctx, final_tensor, updown);
|
||||
final_tensor = ggml_cpy(compute_ctx, final_tensor, model_tensor);
|
||||
} else {
|
||||
final_tensor = ggml_add_inplace(compute_ctx, model_tensor, updown);
|
||||
}
|
||||
ggml_build_forward_expand(gf, final_tensor);
|
||||
if (!ggml_backend_is_cpu(runtime_backend) && ggml_backend_buffer_is_host(original_tensor->buffer)) {
|
||||
original_tensor_to_final_tensor[original_tensor] = final_tensor;
|
||||
}
|
||||
// final_weight = ggml_add_inplace(compute_ctx, weight, updown); // apply directly
|
||||
ggml_build_forward_expand(gf, final_weight);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -825,10 +868,10 @@ struct LoraModel : public GGMLRunner {
|
|||
* this function is called once to calculate the required buffer size
|
||||
* and then again to actually generate a graph to be used */
|
||||
if (applied_lora_tensors_count != total_lora_tensors_count) {
|
||||
LOG_WARN("Only (%lu / %lu) LoRA tensors have been applied",
|
||||
LOG_WARN("Only (%lu / %lu) LoRA tensors will be applied",
|
||||
applied_lora_tensors_count, total_lora_tensors_count);
|
||||
} else {
|
||||
LOG_DEBUG("(%lu / %lu) LoRA tensors applied successfully",
|
||||
LOG_DEBUG("(%lu / %lu) LoRA tensors will be applied",
|
||||
applied_lora_tensors_count, total_lora_tensors_count);
|
||||
}
|
||||
|
||||
|
|
@ -839,7 +882,15 @@ struct LoraModel : public GGMLRunner {
|
|||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_lora_graph(model_tensors, version);
|
||||
};
|
||||
GGMLRunner::compute(get_graph, n_threads, true);
|
||||
GGMLRunner::compute(get_graph, n_threads, false);
|
||||
for (auto item : original_tensor_to_final_tensor) {
|
||||
ggml_tensor* original_tensor = item.first;
|
||||
ggml_tensor* final_tensor = item.second;
|
||||
|
||||
ggml_backend_tensor_copy(final_tensor, original_tensor);
|
||||
}
|
||||
original_tensor_to_final_tensor.clear();
|
||||
GGMLRunner::free_compute_buffer();
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
74
otherarch/sdcpp/ltxv.hpp
Normal file
74
otherarch/sdcpp/ltxv.hpp
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
#ifndef __LTXV_HPP__
|
||||
#define __LTXV_HPP__
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ggml_extend.hpp"
|
||||
|
||||
namespace LTXV {
|
||||
|
||||
class CausalConv3d : public GGMLBlock {
|
||||
protected:
|
||||
int time_kernel_size;
|
||||
|
||||
public:
|
||||
CausalConv3d(int64_t in_channels,
|
||||
int64_t out_channels,
|
||||
int kernel_size = 3,
|
||||
std::tuple<int> stride = {1, 1, 1},
|
||||
int dilation = 1,
|
||||
bool bias = true) {
|
||||
time_kernel_size = kernel_size / 2;
|
||||
blocks["conv"] = std::shared_ptr<GGMLBlock>(new Conv3d(in_channels,
|
||||
out_channels,
|
||||
{kernel_size, kernel_size, kernel_size},
|
||||
stride,
|
||||
{0, kernel_size / 2, kernel_size / 2},
|
||||
{dilation, 1, 1},
|
||||
bias));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
struct ggml_tensor* x,
|
||||
bool causal = true) {
|
||||
// x: [N*IC, ID, IH, IW]
|
||||
// result: [N*OC, OD, OH, OW]
|
||||
auto conv = std::dynamic_pointer_cast<Conv3d>(blocks["conv"]);
|
||||
if (causal) {
|
||||
auto h = ggml_cont(ctx, ggml_permute(ctx, x, 0, 1, 3, 2)); // [ID, N*IC, IH, IW]
|
||||
auto first_frame = ggml_view_3d(ctx, h, h->ne[0], h->ne[1], h->ne[2], h->nb[1], h->nb[2], 0); // [N*IC, IH, IW]
|
||||
first_frame = ggml_reshape_4d(ctx, first_frame, first_frame->ne[0], first_frame->ne[1], 1, first_frame->ne[2]); // [N*IC, 1, IH, IW]
|
||||
auto first_frame_pad = first_frame;
|
||||
for (int i = 1; i < time_kernel_size - 1; i++) {
|
||||
first_frame_pad = ggml_concat(ctx, first_frame_pad, first_frame, 2);
|
||||
}
|
||||
x = ggml_concat(ctx, first_frame_pad, x, 2);
|
||||
} else {
|
||||
auto h = ggml_cont(ctx, ggml_permute(ctx, x, 0, 1, 3, 2)); // [ID, N*IC, IH, IW]
|
||||
int64_t offset = h->nb[2] * h->ne[2];
|
||||
|
||||
auto first_frame = ggml_view_3d(ctx, h, h->ne[0], h->ne[1], h->ne[2], h->nb[1], h->nb[2], 0); // [N*IC, IH, IW]
|
||||
first_frame = ggml_reshape_4d(ctx, first_frame, first_frame->ne[0], first_frame->ne[1], 1, first_frame->ne[2]); // [N*IC, 1, IH, IW]
|
||||
auto first_frame_pad = first_frame;
|
||||
for (int i = 1; i < (time_kernel_size - 1) / 2; i++) {
|
||||
first_frame_pad = ggml_concat(ctx, first_frame_pad, first_frame, 2);
|
||||
}
|
||||
|
||||
auto last_frame = ggml_view_3d(ctx, h, h->ne[0], h->ne[1], h->ne[2], h->nb[1], h->nb[2], offset * (h->ne[3] - 1)); // [N*IC, IH, IW]
|
||||
last_frame = ggml_reshape_4d(ctx, last_frame, last_frame->ne[0], last_frame->ne[1], 1, last_frame->ne[2]); // [N*IC, 1, IH, IW]
|
||||
auto last_frame_pad = last_frame;
|
||||
for (int i = 1; i < (time_kernel_size - 1) / 2; i++) {
|
||||
last_frame_pad = ggml_concat(ctx, last_frame_pad, last_frame, 2);
|
||||
}
|
||||
|
||||
x = ggml_concat(ctx, first_frame_pad, x, 2);
|
||||
x = ggml_concat(ctx, x, last_frame_pad, 2);
|
||||
}
|
||||
|
||||
x = conv->forward(ctx, x);
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -142,43 +142,21 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class RMSNorm : public UnaryBlock {
|
||||
protected:
|
||||
int64_t hidden_size;
|
||||
float eps;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
|
||||
enum ggml_type wtype = GGML_TYPE_F32;
|
||||
params["weight"] = ggml_new_tensor_1d(ctx, wtype, hidden_size);
|
||||
}
|
||||
|
||||
public:
|
||||
RMSNorm(int64_t hidden_size,
|
||||
float eps = 1e-06f)
|
||||
: hidden_size(hidden_size),
|
||||
eps(eps) {}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* w = params["weight"];
|
||||
x = ggml_rms_norm(ctx, x, eps);
|
||||
x = ggml_mul(ctx, x, w);
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
class SelfAttention : public GGMLBlock {
|
||||
public:
|
||||
int64_t num_heads;
|
||||
bool pre_only;
|
||||
std::string qk_norm;
|
||||
bool flash_attn;
|
||||
|
||||
public:
|
||||
SelfAttention(int64_t dim,
|
||||
int64_t num_heads = 8,
|
||||
std::string qk_norm = "",
|
||||
bool qkv_bias = false,
|
||||
bool pre_only = false)
|
||||
: num_heads(num_heads), pre_only(pre_only), qk_norm(qk_norm) {
|
||||
bool pre_only = false,
|
||||
bool flash_attn = false)
|
||||
: num_heads(num_heads), pre_only(pre_only), qk_norm(qk_norm), flash_attn(flash_attn) {
|
||||
int64_t d_head = dim / num_heads;
|
||||
blocks["qkv"] = std::shared_ptr<GGMLBlock>(new Linear(dim, dim * 3, qkv_bias));
|
||||
if (!pre_only) {
|
||||
|
|
@ -226,10 +204,12 @@ public:
|
|||
}
|
||||
|
||||
// x: [N, n_token, dim]
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x) {
|
||||
auto qkv = pre_attention(ctx, x);
|
||||
x = ggml_nn_attention_ext(ctx, qkv[0], qkv[1], qkv[2], num_heads); // [N, n_token, dim]
|
||||
x = post_attention(ctx, x); // [N, n_token, dim]
|
||||
x = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads, NULL, false, false, true); // [N, n_token, dim]
|
||||
x = post_attention(ctx, x); // [N, n_token, dim]
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
|
@ -254,6 +234,7 @@ public:
|
|||
int64_t num_heads;
|
||||
bool pre_only;
|
||||
bool self_attn;
|
||||
bool flash_attn;
|
||||
|
||||
public:
|
||||
DismantledBlock(int64_t hidden_size,
|
||||
|
|
@ -262,16 +243,17 @@ public:
|
|||
std::string qk_norm = "",
|
||||
bool qkv_bias = false,
|
||||
bool pre_only = false,
|
||||
bool self_attn = false)
|
||||
bool self_attn = false,
|
||||
bool flash_attn = false)
|
||||
: num_heads(num_heads), pre_only(pre_only), self_attn(self_attn) {
|
||||
// rmsnorm is always Flase
|
||||
// scale_mod_only is always Flase
|
||||
// swiglu is always Flase
|
||||
blocks["norm1"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size, 1e-06f, false));
|
||||
blocks["attn"] = std::shared_ptr<GGMLBlock>(new SelfAttention(hidden_size, num_heads, qk_norm, qkv_bias, pre_only));
|
||||
blocks["attn"] = std::shared_ptr<GGMLBlock>(new SelfAttention(hidden_size, num_heads, qk_norm, qkv_bias, pre_only, flash_attn));
|
||||
|
||||
if (self_attn) {
|
||||
blocks["attn2"] = std::shared_ptr<GGMLBlock>(new SelfAttention(hidden_size, num_heads, qk_norm, qkv_bias, false));
|
||||
blocks["attn2"] = std::shared_ptr<GGMLBlock>(new SelfAttention(hidden_size, num_heads, qk_norm, qkv_bias, false, flash_attn));
|
||||
}
|
||||
|
||||
if (!pre_only) {
|
||||
|
|
@ -439,7 +421,10 @@ public:
|
|||
return x;
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* c) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* c) {
|
||||
// x: [N, n_token, hidden_size]
|
||||
// c: [N, hidden_size]
|
||||
// return: [N, n_token, hidden_size]
|
||||
|
|
@ -454,8 +439,8 @@ public:
|
|||
auto qkv2 = std::get<1>(qkv_intermediates);
|
||||
auto intermediates = std::get<2>(qkv_intermediates);
|
||||
|
||||
auto attn_out = ggml_nn_attention_ext(ctx, qkv[0], qkv[1], qkv[2], num_heads); // [N, n_token, dim]
|
||||
auto attn2_out = ggml_nn_attention_ext(ctx, qkv2[0], qkv2[1], qkv2[2], num_heads); // [N, n_token, dim]
|
||||
auto attn_out = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads, NULL, false, false, flash_attn); // [N, n_token, dim]
|
||||
auto attn2_out = ggml_nn_attention_ext(ctx, backend, qkv2[0], qkv2[1], qkv2[2], num_heads, NULL, false, false, flash_attn); // [N, n_token, dim]
|
||||
x = post_attention_x(ctx,
|
||||
attn_out,
|
||||
attn2_out,
|
||||
|
|
@ -471,7 +456,7 @@ public:
|
|||
auto qkv = qkv_intermediates.first;
|
||||
auto intermediates = qkv_intermediates.second;
|
||||
|
||||
auto attn_out = ggml_nn_attention_ext(ctx, qkv[0], qkv[1], qkv[2], num_heads); // [N, n_token, dim]
|
||||
auto attn_out = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads, NULL, false, false, flash_attn); // [N, n_token, dim]
|
||||
x = post_attention(ctx,
|
||||
attn_out,
|
||||
intermediates[0],
|
||||
|
|
@ -486,6 +471,8 @@ public:
|
|||
|
||||
__STATIC_INLINE__ std::pair<struct ggml_tensor*, struct ggml_tensor*>
|
||||
block_mixing(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
bool flash_attn,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* c,
|
||||
|
|
@ -515,8 +502,8 @@ block_mixing(struct ggml_context* ctx,
|
|||
qkv.push_back(ggml_concat(ctx, context_qkv[i], x_qkv[i], 1));
|
||||
}
|
||||
|
||||
auto attn = ggml_nn_attention_ext(ctx, qkv[0], qkv[1], qkv[2], x_block->num_heads); // [N, n_context + n_token, hidden_size]
|
||||
attn = ggml_cont(ctx, ggml_permute(ctx, attn, 0, 2, 1, 3)); // [n_context + n_token, N, hidden_size]
|
||||
auto attn = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], x_block->num_heads, NULL, false, false, flash_attn); // [N, n_context + n_token, hidden_size]
|
||||
attn = ggml_cont(ctx, ggml_permute(ctx, attn, 0, 2, 1, 3)); // [n_context + n_token, N, hidden_size]
|
||||
auto context_attn = ggml_view_3d(ctx,
|
||||
attn,
|
||||
attn->ne[0],
|
||||
|
|
@ -549,7 +536,7 @@ block_mixing(struct ggml_context* ctx,
|
|||
}
|
||||
|
||||
if (x_block->self_attn) {
|
||||
auto attn2 = ggml_nn_attention_ext(ctx, x_qkv2[0], x_qkv2[1], x_qkv2[2], x_block->num_heads); // [N, n_token, hidden_size]
|
||||
auto attn2 = ggml_nn_attention_ext(ctx, backend, x_qkv2[0], x_qkv2[1], x_qkv2[2], x_block->num_heads); // [N, n_token, hidden_size]
|
||||
|
||||
x = x_block->post_attention_x(ctx,
|
||||
x_attn,
|
||||
|
|
@ -574,6 +561,8 @@ block_mixing(struct ggml_context* ctx,
|
|||
}
|
||||
|
||||
struct JointBlock : public GGMLBlock {
|
||||
bool flash_attn;
|
||||
|
||||
public:
|
||||
JointBlock(int64_t hidden_size,
|
||||
int64_t num_heads,
|
||||
|
|
@ -581,19 +570,22 @@ public:
|
|||
std::string qk_norm = "",
|
||||
bool qkv_bias = false,
|
||||
bool pre_only = false,
|
||||
bool self_attn_x = false) {
|
||||
blocks["context_block"] = std::shared_ptr<GGMLBlock>(new DismantledBlock(hidden_size, num_heads, mlp_ratio, qk_norm, qkv_bias, pre_only));
|
||||
blocks["x_block"] = std::shared_ptr<GGMLBlock>(new DismantledBlock(hidden_size, num_heads, mlp_ratio, qk_norm, qkv_bias, false, self_attn_x));
|
||||
bool self_attn_x = false,
|
||||
bool flash_attn = false)
|
||||
: flash_attn(flash_attn) {
|
||||
blocks["context_block"] = std::shared_ptr<GGMLBlock>(new DismantledBlock(hidden_size, num_heads, mlp_ratio, qk_norm, qkv_bias, pre_only, false, flash_attn));
|
||||
blocks["x_block"] = std::shared_ptr<GGMLBlock>(new DismantledBlock(hidden_size, num_heads, mlp_ratio, qk_norm, qkv_bias, false, self_attn_x, flash_attn));
|
||||
}
|
||||
|
||||
std::pair<struct ggml_tensor*, struct ggml_tensor*> forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* c) {
|
||||
auto context_block = std::dynamic_pointer_cast<DismantledBlock>(blocks["context_block"]);
|
||||
auto x_block = std::dynamic_pointer_cast<DismantledBlock>(blocks["x_block"]);
|
||||
|
||||
return block_mixing(ctx, context, x, c, context_block, x_block);
|
||||
return block_mixing(ctx, backend, flash_attn, context, x, c, context_block, x_block);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -651,6 +643,7 @@ protected:
|
|||
int64_t context_embedder_out_dim = 1536;
|
||||
int64_t hidden_size;
|
||||
std::string qk_norm;
|
||||
bool flash_attn = false;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
|
||||
enum ggml_type wtype = GGML_TYPE_F32;
|
||||
|
|
@ -658,7 +651,8 @@ protected:
|
|||
}
|
||||
|
||||
public:
|
||||
MMDiT(const String2GGMLType& tensor_types = {}) {
|
||||
MMDiT(bool flash_attn = false, const String2GGMLType& tensor_types = {})
|
||||
: flash_attn(flash_attn) {
|
||||
// input_size is always None
|
||||
// learn_sigma is always False
|
||||
// register_length is alwalys 0
|
||||
|
|
@ -726,7 +720,8 @@ public:
|
|||
qk_norm,
|
||||
true,
|
||||
i == depth - 1,
|
||||
i <= d_self));
|
||||
i <= d_self,
|
||||
flash_attn));
|
||||
}
|
||||
|
||||
blocks["final_layer"] = std::shared_ptr<GGMLBlock>(new FinalLayer(hidden_size, patch_size, out_channels));
|
||||
|
|
@ -795,6 +790,7 @@ public:
|
|||
}
|
||||
|
||||
struct ggml_tensor* forward_core_with_concat(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* c_mod,
|
||||
struct ggml_tensor* context,
|
||||
|
|
@ -813,7 +809,7 @@ public:
|
|||
|
||||
auto block = std::dynamic_pointer_cast<JointBlock>(blocks["joint_blocks." + std::to_string(i)]);
|
||||
|
||||
auto context_x = block->forward(ctx, context, x, c_mod);
|
||||
auto context_x = block->forward(ctx, backend, context, x, c_mod);
|
||||
context = context_x.first;
|
||||
x = context_x.second;
|
||||
}
|
||||
|
|
@ -824,6 +820,7 @@ public:
|
|||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* t,
|
||||
struct ggml_tensor* y = NULL,
|
||||
|
|
@ -859,7 +856,7 @@ public:
|
|||
context = context_embedder->forward(ctx, context); // [N, L, D] aka [N, L, 1536]
|
||||
}
|
||||
|
||||
x = forward_core_with_concat(ctx, x, c, context, skip_layers); // (N, H*W, patch_size ** 2 * out_channels)
|
||||
x = forward_core_with_concat(ctx, backend, x, c, context, skip_layers); // (N, H*W, patch_size ** 2 * out_channels)
|
||||
|
||||
x = unpatchify(ctx, x, h, w); // [N, C, H, W]
|
||||
|
||||
|
|
@ -870,9 +867,11 @@ struct MMDiTRunner : public GGMLRunner {
|
|||
MMDiT mmdit;
|
||||
|
||||
MMDiTRunner(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
bool flash_attn,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
const std::string prefix = "")
|
||||
: GGMLRunner(backend), mmdit(tensor_types) {
|
||||
: GGMLRunner(backend, offload_params_to_cpu), mmdit(flash_attn, tensor_types) {
|
||||
mmdit.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
|
|
@ -897,6 +896,7 @@ struct MMDiTRunner : public GGMLRunner {
|
|||
timesteps = to_backend(timesteps);
|
||||
|
||||
struct ggml_tensor* out = mmdit.forward(compute_ctx,
|
||||
runtime_backend,
|
||||
x,
|
||||
timesteps,
|
||||
y,
|
||||
|
|
@ -970,7 +970,7 @@ struct MMDiTRunner : public GGMLRunner {
|
|||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_F16;
|
||||
std::shared_ptr<MMDiTRunner> mmdit = std::shared_ptr<MMDiTRunner>(new MMDiTRunner(backend));
|
||||
std::shared_ptr<MMDiTRunner> mmdit = std::shared_ptr<MMDiTRunner>(new MMDiTRunner(backend, false, false));
|
||||
{
|
||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||
|
||||
|
|
@ -984,7 +984,7 @@ struct MMDiTRunner : public GGMLRunner {
|
|||
return;
|
||||
}
|
||||
|
||||
bool success = model_loader.load_tensors(tensors, backend);
|
||||
bool success = model_loader.load_tensors(tensors);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
|
|
|
|||
|
|
@ -1,16 +1,24 @@
|
|||
#include <stdarg.h>
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <regex>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <filesystem>
|
||||
|
||||
#include "gguf_reader.hpp"
|
||||
#include "model.h"
|
||||
#include "stable-diffusion.h"
|
||||
#include "util.h"
|
||||
#include "vocab.hpp"
|
||||
#include "vocab_umt5.hpp"
|
||||
|
||||
#include "ggml-alloc.h"
|
||||
#include "ggml-backend.h"
|
||||
|
|
@ -102,6 +110,7 @@ const char* unused_tensors[] = {
|
|||
"posterior_mean_coef1",
|
||||
"posterior_mean_coef2",
|
||||
"cond_stage_model.transformer.text_model.embeddings.position_ids",
|
||||
"cond_stage_model.transformer.vision_model.embeddings.position_ids",
|
||||
"cond_stage_model.model.logit_scale",
|
||||
"cond_stage_model.model.text_projection",
|
||||
"conditioner.embedders.0.transformer.text_model.embeddings.position_ids",
|
||||
|
|
@ -118,7 +127,7 @@ const char* unused_tensors[] = {
|
|||
};
|
||||
|
||||
bool is_unused_tensor(std::string name) {
|
||||
for (int i = 0; i < sizeof(unused_tensors) / sizeof(const char*); i++) {
|
||||
for (size_t i = 0; i < sizeof(unused_tensors) / sizeof(const char*); i++) {
|
||||
if (starts_with(name, unused_tensors[i])) {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -155,6 +164,11 @@ std::unordered_map<std::string, std::string> open_clip_to_hk_clip_resblock = {
|
|||
{"mlp.c_proj.weight", "mlp.fc2.weight"},
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, std::string> cond_model_name_map = {
|
||||
{"transformer.vision_model.pre_layrnorm.weight", "transformer.vision_model.pre_layernorm.weight"},
|
||||
{"transformer.vision_model.pre_layrnorm.bias", "transformer.vision_model.pre_layernorm.bias"},
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, std::string> vae_decoder_name_map = {
|
||||
{"first_stage_model.decoder.mid.attn_1.to_k.bias", "first_stage_model.decoder.mid.attn_1.k.bias"},
|
||||
{"first_stage_model.decoder.mid.attn_1.to_k.weight", "first_stage_model.decoder.mid.attn_1.k.weight"},
|
||||
|
|
@ -193,7 +207,7 @@ std::unordered_map<std::string, std::string> pmid_v2_name_map = {
|
|||
"pmid.qformer_perceiver.token_proj.fc2.weight"},
|
||||
};
|
||||
|
||||
std::string convert_open_clip_to_hf_clip(const std::string& name) {
|
||||
std::string convert_cond_model_name(const std::string& name) {
|
||||
std::string new_name = name;
|
||||
std::string prefix;
|
||||
if (contains(new_name, ".enc.")) {
|
||||
|
|
@ -282,6 +296,10 @@ std::string convert_open_clip_to_hf_clip(const std::string& name) {
|
|||
new_name = open_clip_to_hf_clip_model[new_name];
|
||||
}
|
||||
|
||||
if (cond_model_name_map.find(new_name) != cond_model_name_map.end()) {
|
||||
new_name = cond_model_name_map[new_name];
|
||||
}
|
||||
|
||||
std::string open_clip_resblock_prefix = "model.transformer.resblocks.";
|
||||
std::string hf_clip_resblock_prefix = "transformer.text_model.encoder.layers.";
|
||||
|
||||
|
|
@ -577,7 +595,7 @@ std::string convert_tensor_name(std::string name) {
|
|||
// }
|
||||
std::string new_name = name;
|
||||
if (starts_with(name, "cond_stage_model.") || starts_with(name, "conditioner.embedders.") || starts_with(name, "text_encoders.") || ends_with(name, ".vision_model.visual_projection.weight")) {
|
||||
new_name = convert_open_clip_to_hf_clip(name);
|
||||
new_name = convert_cond_model_name(name);
|
||||
} else if (starts_with(name, "first_stage_model.decoder")) {
|
||||
new_name = convert_vae_decoder_name(name);
|
||||
} else if (starts_with(name, "pmid.qformer_perceiver")) {
|
||||
|
|
@ -606,9 +624,11 @@ std::string convert_tensor_name(std::string name) {
|
|||
} else {
|
||||
new_name = name;
|
||||
}
|
||||
} else if (ends_with(name, ".diff") || ends_with(name, ".diff_b")) {
|
||||
new_name = "lora." + name;
|
||||
} else if (contains(name, "lora_up") || contains(name, "lora_down") ||
|
||||
contains(name, "lora.up") || contains(name, "lora.down") ||
|
||||
contains(name, "lora_linear")) {
|
||||
contains(name, "lora_linear") || ends_with(name, ".alpha")) {
|
||||
size_t pos = new_name.find(".processor");
|
||||
if (pos != std::string::npos) {
|
||||
new_name.replace(pos, strlen(".processor"), "");
|
||||
|
|
@ -616,7 +636,11 @@ std::string convert_tensor_name(std::string name) {
|
|||
// if (starts_with(new_name, "transformer.transformer_blocks") || starts_with(new_name, "transformer.single_transformer_blocks")) {
|
||||
// new_name = "model.diffusion_model." + new_name;
|
||||
// }
|
||||
pos = new_name.rfind("lora");
|
||||
if (ends_with(name, ".alpha")) {
|
||||
pos = new_name.rfind("alpha");
|
||||
} else {
|
||||
pos = new_name.rfind("lora");
|
||||
}
|
||||
if (pos != std::string::npos) {
|
||||
std::string name_without_network_parts = new_name.substr(0, pos - 1);
|
||||
std::string network_part = new_name.substr(pos);
|
||||
|
|
@ -698,6 +722,13 @@ void preprocess_tensor(TensorStorage tensor_storage,
|
|||
tensor_storage.unsqueeze();
|
||||
}
|
||||
|
||||
// wan vae
|
||||
if (ends_with(new_name, "gamma")) {
|
||||
tensor_storage.reverse_ne();
|
||||
tensor_storage.n_dims = 1;
|
||||
tensor_storage.reverse_ne();
|
||||
}
|
||||
|
||||
tensor_storage.name = new_name;
|
||||
|
||||
if (new_name.find("cond_stage_model") != std::string::npos &&
|
||||
|
|
@ -1055,10 +1086,38 @@ bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::s
|
|||
|
||||
gguf_context* ctx_gguf_ = NULL;
|
||||
ggml_context* ctx_meta_ = NULL;
|
||||
ctx_gguf_ = gguf_init_from_file(file_path.c_str(), {true, &ctx_meta_});
|
||||
|
||||
ctx_gguf_ = gguf_init_from_file(file_path.c_str(), {true, &ctx_meta_});
|
||||
if (!ctx_gguf_) {
|
||||
LOG_ERROR("failed to open '%s'", file_path.c_str());
|
||||
return false;
|
||||
LOG_ERROR("failed to open '%s' with gguf_init_from_file. Try to open it with GGUFReader.", file_path.c_str());
|
||||
GGUFReader gguf_reader;
|
||||
if (!gguf_reader.load(file_path)) {
|
||||
LOG_ERROR("failed to open '%s' with GGUFReader.", file_path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t data_offset = gguf_reader.data_offset();
|
||||
for (const auto& gguf_tensor_info : gguf_reader.tensors()) {
|
||||
std::string name = gguf_tensor_info.name;
|
||||
if (!starts_with(name, prefix)) {
|
||||
name = prefix + name;
|
||||
}
|
||||
|
||||
TensorStorage tensor_storage(
|
||||
name,
|
||||
gguf_tensor_info.type,
|
||||
gguf_tensor_info.shape.data(),
|
||||
gguf_tensor_info.shape.size(),
|
||||
file_index,
|
||||
data_offset + gguf_tensor_info.offset);
|
||||
|
||||
// LOG_DEBUG("%s %s", name.c_str(), tensor_storage.to_string().c_str());
|
||||
|
||||
tensor_storages.push_back(tensor_storage);
|
||||
add_preprocess_tensor_storage_types(tensor_storages_types, tensor_storage.name, tensor_storage.type);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int n_tensors = gguf_get_n_tensors(ctx_gguf_);
|
||||
|
|
@ -1072,7 +1131,11 @@ bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::s
|
|||
|
||||
// LOG_DEBUG("%s", name.c_str());
|
||||
|
||||
TensorStorage tensor_storage(prefix + name, dummy->type, dummy->ne, ggml_n_dims(dummy), file_index, offset);
|
||||
if (!starts_with(name, prefix)) {
|
||||
name = prefix + name;
|
||||
}
|
||||
|
||||
TensorStorage tensor_storage(name, dummy->type, dummy->ne, ggml_n_dims(dummy), file_index, offset);
|
||||
|
||||
GGML_ASSERT(ggml_nbytes(dummy) == tensor_storage.nbytes());
|
||||
|
||||
|
|
@ -1110,7 +1173,7 @@ ggml_type str_to_ggml_type(const std::string& dtype) {
|
|||
|
||||
// https://huggingface.co/docs/safetensors/index
|
||||
bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const std::string& prefix) {
|
||||
LOG_DEBUG("init from '%s'", file_path.c_str());
|
||||
LOG_DEBUG("init from '%s', prefix = '%s'", file_path.c_str(), prefix.c_str());
|
||||
file_paths_.push_back(file_path);
|
||||
size_t file_index = file_paths_.size() - 1;
|
||||
#ifdef _WIN32
|
||||
|
|
@ -1180,6 +1243,10 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
|
|||
std::string dtype = tensor_info["dtype"];
|
||||
nlohmann::json shape = tensor_info["shape"];
|
||||
|
||||
if (dtype == "U8") {
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t begin = tensor_info["data_offsets"][0].get<size_t>();
|
||||
size_t end = tensor_info["data_offsets"][1].get<size_t>();
|
||||
|
||||
|
|
@ -1201,12 +1268,11 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
|
|||
}
|
||||
|
||||
if (n_dims == 5) {
|
||||
if (ne[3] == 1 && ne[4] == 1) {
|
||||
n_dims = 4;
|
||||
} else {
|
||||
LOG_ERROR("invalid tensor '%s'", name.c_str());
|
||||
return false;
|
||||
}
|
||||
n_dims = 4;
|
||||
ne[0] = ne[0] * ne[1];
|
||||
ne[1] = ne[2];
|
||||
ne[2] = ne[3];
|
||||
ne[3] = ne[4];
|
||||
}
|
||||
|
||||
// ggml_n_dims returns 1 for scalars
|
||||
|
|
@ -1214,7 +1280,11 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
|
|||
n_dims = 1;
|
||||
}
|
||||
|
||||
TensorStorage tensor_storage(prefix + name, type, ne, n_dims, file_index, ST_HEADER_SIZE_LEN + header_size_ + begin);
|
||||
if (!starts_with(name, prefix)) {
|
||||
name = prefix + name;
|
||||
}
|
||||
|
||||
TensorStorage tensor_storage(name, type, ne, n_dims, file_index, ST_HEADER_SIZE_LEN + header_size_ + begin);
|
||||
tensor_storage.reverse_ne();
|
||||
|
||||
size_t tensor_data_size = end - begin;
|
||||
|
|
@ -1599,7 +1669,11 @@ bool ModelLoader::parse_data_pkl(uint8_t* buffer,
|
|||
reader.tensor_storage.file_index = file_index;
|
||||
// if(strcmp(prefix.c_str(), "scarlett") == 0)
|
||||
// printf(" ZIP got tensor %s \n ", reader.tensor_storage.name.c_str());
|
||||
reader.tensor_storage.name = prefix + reader.tensor_storage.name;
|
||||
std::string name = reader.tensor_storage.name;
|
||||
if (!starts_with(name, prefix)) {
|
||||
name = prefix + name;
|
||||
}
|
||||
reader.tensor_storage.name = name;
|
||||
tensor_storages.push_back(reader.tensor_storage);
|
||||
add_preprocess_tensor_storage_types(tensor_storages_types, reader.tensor_storage.name, reader.tensor_storage.type);
|
||||
|
||||
|
|
@ -1681,12 +1755,14 @@ SDVersion ModelLoader::get_sd_version() {
|
|||
bool has_multiple_encoders = false;
|
||||
bool is_unet = false;
|
||||
|
||||
bool is_xl = false;
|
||||
bool is_flux = false;
|
||||
bool is_xl = false;
|
||||
bool is_flux = false;
|
||||
bool is_wan = false;
|
||||
int64_t patch_embedding_channels = 0;
|
||||
bool has_img_emb = false;
|
||||
|
||||
#define found_family (is_xl || is_flux)
|
||||
for (auto& tensor_storage : tensor_storages) {
|
||||
if (!found_family) {
|
||||
if (!(is_xl || is_flux)) {
|
||||
if (tensor_storage.name.find("model.diffusion_model.double_blocks.") != std::string::npos) {
|
||||
is_flux = true;
|
||||
if (input_block_checked) {
|
||||
|
|
@ -1696,6 +1772,15 @@ SDVersion ModelLoader::get_sd_version() {
|
|||
if (tensor_storage.name.find("model.diffusion_model.joint_blocks.") != std::string::npos) {
|
||||
return VERSION_SD3;
|
||||
}
|
||||
if (tensor_storage.name.find("model.diffusion_model.blocks.0.cross_attn.norm_k.weight") != std::string::npos) {
|
||||
is_wan = true;
|
||||
}
|
||||
if (tensor_storage.name.find("model.diffusion_model.patch_embedding.weight") != std::string::npos) {
|
||||
patch_embedding_channels = tensor_storage.ne[3];
|
||||
}
|
||||
if (tensor_storage.name.find("model.diffusion_model.img_emb") != std::string::npos) {
|
||||
has_img_emb = true;
|
||||
}
|
||||
if (tensor_storage.name.find("model.diffusion_model.input_blocks.") != std::string::npos || tensor_storage.name.find("unet.down_blocks.") != std::string::npos) {
|
||||
is_unet = true;
|
||||
if (has_multiple_encoders) {
|
||||
|
|
@ -1730,11 +1815,21 @@ SDVersion ModelLoader::get_sd_version() {
|
|||
if (tensor_storage.name == "model.diffusion_model.input_blocks.0.0.weight" || tensor_storage.name == "model.diffusion_model.img_in.weight" || tensor_storage.name == "unet.conv_in.weight") {
|
||||
input_block_weight = tensor_storage;
|
||||
input_block_checked = true;
|
||||
if (found_family) {
|
||||
if (is_xl || is_flux) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (is_wan) {
|
||||
LOG_DEBUG("patch_embedding_channels %d", patch_embedding_channels);
|
||||
if (patch_embedding_channels == 184320 && !has_img_emb) {
|
||||
return VERSION_WAN2_2_I2V;
|
||||
}
|
||||
if (patch_embedding_channels == 147456 && !has_img_emb) {
|
||||
return VERSION_WAN2_2_TI2V;
|
||||
}
|
||||
return VERSION_WAN2;
|
||||
}
|
||||
bool is_inpaint = input_block_weight.ne[2] == 9;
|
||||
bool is_ip2p = input_block_weight.ne[2] == 8;
|
||||
if (is_xl) {
|
||||
|
|
@ -1890,242 +1985,368 @@ std::string ModelLoader::load_t5_tokenizer_json() {
|
|||
return json_str;
|
||||
}
|
||||
|
||||
std::vector<TensorStorage> remove_duplicates(const std::vector<TensorStorage>& vec) {
|
||||
std::vector<TensorStorage> res;
|
||||
std::unordered_map<std::string, size_t> name_to_index_map;
|
||||
std::string ModelLoader::load_umt5_tokenizer_json() {
|
||||
std::string json_str(reinterpret_cast<const char*>(umt5_tokenizer_json_str), sizeof(umt5_tokenizer_json_str));
|
||||
return json_str;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
const std::string& current_name = vec[i].name;
|
||||
auto it = name_to_index_map.find(current_name);
|
||||
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p) {
|
||||
int64_t process_time_ms = 0;
|
||||
std::atomic<int64_t> read_time_ms(0);
|
||||
std::atomic<int64_t> memcpy_time_ms(0);
|
||||
std::atomic<int64_t> copy_to_backend_time_ms(0);
|
||||
std::atomic<int64_t> convert_time_ms(0);
|
||||
|
||||
if (it != name_to_index_map.end()) {
|
||||
res[it->second] = vec[i];
|
||||
} else {
|
||||
name_to_index_map[current_name] = i;
|
||||
res.push_back(vec[i]);
|
||||
int num_threads_to_use = n_threads_p > 0 ? n_threads_p : sd_get_num_physical_cores();
|
||||
LOG_DEBUG("using %d threads for model loading", num_threads_to_use);
|
||||
|
||||
int64_t start_time = ggml_time_ms();
|
||||
std::vector<TensorStorage> processed_tensor_storages;
|
||||
|
||||
{
|
||||
struct IndexedStorage {
|
||||
size_t index;
|
||||
TensorStorage ts;
|
||||
};
|
||||
|
||||
std::mutex vec_mutex;
|
||||
std::vector<IndexedStorage> all_results;
|
||||
|
||||
int n_threads = std::min(num_threads_to_use, (int)tensor_storages.size());
|
||||
if (n_threads < 1) {
|
||||
n_threads = 1;
|
||||
}
|
||||
std::vector<std::thread> workers;
|
||||
|
||||
for (int i = 0; i < n_threads; ++i) {
|
||||
workers.emplace_back([&, thread_id = i]() {
|
||||
std::vector<IndexedStorage> local_results;
|
||||
std::vector<TensorStorage> temp_storages;
|
||||
|
||||
for (size_t j = thread_id; j < tensor_storages.size(); j += n_threads) {
|
||||
const auto& tensor_storage = tensor_storages[j];
|
||||
if (is_unused_tensor(tensor_storage.name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
temp_storages.clear();
|
||||
preprocess_tensor(tensor_storage, temp_storages);
|
||||
|
||||
for (const auto& ts : temp_storages) {
|
||||
local_results.push_back({j, ts});
|
||||
}
|
||||
}
|
||||
|
||||
if (!local_results.empty()) {
|
||||
std::lock_guard<std::mutex> lock(vec_mutex);
|
||||
all_results.insert(all_results.end(),
|
||||
local_results.begin(), local_results.end());
|
||||
}
|
||||
});
|
||||
}
|
||||
for (auto& w : workers) {
|
||||
w.join();
|
||||
}
|
||||
|
||||
std::vector<IndexedStorage> deduplicated;
|
||||
deduplicated.reserve(all_results.size());
|
||||
std::unordered_map<std::string, size_t> name_to_pos;
|
||||
for (auto& entry : all_results) {
|
||||
auto it = name_to_pos.find(entry.ts.name);
|
||||
if (it == name_to_pos.end()) {
|
||||
name_to_pos.emplace(entry.ts.name, deduplicated.size());
|
||||
deduplicated.push_back(entry);
|
||||
} else if (deduplicated[it->second].index < entry.index) {
|
||||
deduplicated[it->second] = entry;
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(deduplicated.begin(), deduplicated.end(), [](const IndexedStorage& a, const IndexedStorage& b) {
|
||||
return a.index < b.index;
|
||||
});
|
||||
|
||||
processed_tensor_storages.reserve(deduplicated.size());
|
||||
for (auto& entry : deduplicated) {
|
||||
processed_tensor_storages.push_back(entry.ts);
|
||||
}
|
||||
}
|
||||
|
||||
// vec.resize(name_to_index_map.size());
|
||||
process_time_ms = ggml_time_ms() - start_time;
|
||||
|
||||
return res;
|
||||
}
|
||||
bool success = true;
|
||||
size_t total_tensors_processed = 0;
|
||||
const size_t total_tensors_to_process = processed_tensor_storages.size();
|
||||
const int64_t t_start = ggml_time_ms();
|
||||
int last_n_threads = 1;
|
||||
|
||||
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend_t backend) {
|
||||
std::vector<TensorStorage> processed_tensor_storages;
|
||||
for (auto& tensor_storage : tensor_storages) {
|
||||
// LOG_DEBUG("%s", name.c_str());
|
||||
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
|
||||
std::string file_path = file_paths_[file_index];
|
||||
LOG_DEBUG("loading tensors from %s", file_path.c_str());
|
||||
|
||||
if (is_unused_tensor(tensor_storage.name)) {
|
||||
std::vector<const TensorStorage*> file_tensors;
|
||||
for (const auto& ts : processed_tensor_storages) {
|
||||
if (ts.file_index == file_index) {
|
||||
file_tensors.push_back(&ts);
|
||||
}
|
||||
}
|
||||
if (file_tensors.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
preprocess_tensor(tensor_storage, processed_tensor_storages);
|
||||
}
|
||||
std::vector<TensorStorage> dedup = remove_duplicates(processed_tensor_storages);
|
||||
processed_tensor_storages = dedup;
|
||||
|
||||
bool success = true;
|
||||
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
|
||||
std::string file_path = file_paths_[file_index];
|
||||
LOG_DEBUG("loading tensors from %s\n", file_path.c_str());
|
||||
|
||||
#ifdef _WIN32
|
||||
std::filesystem::path fpath = std::filesystem::u8path(file_path);
|
||||
#else
|
||||
std::filesystem::path fpath = std::filesystem::path(file_path);
|
||||
#endif
|
||||
std::ifstream file(fpath, std::ios::binary);
|
||||
if (!file.is_open()) {
|
||||
LOG_ERROR("failed to open '%s'", file_path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_zip = false;
|
||||
for (auto& tensor_storage : tensor_storages) {
|
||||
if (tensor_storage.file_index != file_index) {
|
||||
continue;
|
||||
}
|
||||
if (tensor_storage.index_in_zip >= 0) {
|
||||
for (auto const& ts : file_tensors) {
|
||||
if (ts->index_in_zip >= 0) {
|
||||
is_zip = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
struct zip_t* zip = NULL;
|
||||
if (is_zip) {
|
||||
zip = zip_open(file_path.c_str(), 0, 'r');
|
||||
if (zip == NULL) {
|
||||
LOG_ERROR("failed to open zip '%s'", file_path.c_str());
|
||||
return false;
|
||||
}
|
||||
int n_threads = is_zip ? 1 : std::min(num_threads_to_use, (int)file_tensors.size());
|
||||
if (n_threads < 1) {
|
||||
n_threads = 1;
|
||||
}
|
||||
last_n_threads = n_threads;
|
||||
|
||||
std::atomic<size_t> tensor_idx(0);
|
||||
std::atomic<bool> failed(false);
|
||||
std::vector<std::thread> workers;
|
||||
|
||||
for (int i = 0; i < n_threads; ++i) {
|
||||
workers.emplace_back([&, file_path, is_zip]() {
|
||||
std::ifstream file;
|
||||
struct zip_t* zip = NULL;
|
||||
if (is_zip) {
|
||||
zip = zip_open(file_path.c_str(), 0, 'r');
|
||||
if (zip == NULL) {
|
||||
LOG_ERROR("failed to open zip '%s'", file_path.c_str());
|
||||
failed = true;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// kcpp
|
||||
#ifdef _WIN32
|
||||
std::filesystem::path fpath = std::filesystem::u8path(file_path);
|
||||
#else
|
||||
std::filesystem::path fpath = std::filesystem::path(file_path);
|
||||
#endif
|
||||
file.open(fpath, std::ios::binary);
|
||||
if (!file.is_open()) {
|
||||
LOG_ERROR("failed to open '%s'", file_path.c_str());
|
||||
failed = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint8_t> read_buffer;
|
||||
std::vector<uint8_t> convert_buffer;
|
||||
|
||||
while (true) {
|
||||
int64_t t0, t1;
|
||||
size_t idx = tensor_idx.fetch_add(1);
|
||||
if (idx >= file_tensors.size() || failed) {
|
||||
break;
|
||||
}
|
||||
|
||||
const TensorStorage& tensor_storage = *file_tensors[idx];
|
||||
ggml_tensor* dst_tensor = NULL;
|
||||
|
||||
t0 = ggml_time_ms();
|
||||
|
||||
if (!on_new_tensor_cb(tensor_storage, &dst_tensor)) {
|
||||
LOG_WARN("process tensor failed: '%s'", tensor_storage.name.c_str());
|
||||
failed = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (dst_tensor == NULL) {
|
||||
t1 = ggml_time_ms();
|
||||
read_time_ms.fetch_add(t1 - t0);
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t nbytes_to_read = tensor_storage.nbytes_to_read();
|
||||
|
||||
auto read_data = [&](char* buf, size_t n) {
|
||||
if (zip != NULL) {
|
||||
zip_entry_openbyindex(zip, tensor_storage.index_in_zip);
|
||||
size_t entry_size = zip_entry_size(zip);
|
||||
if (entry_size != n) {
|
||||
int64_t t_memcpy_start;
|
||||
read_buffer.resize(entry_size);
|
||||
zip_entry_noallocread(zip, (void*)read_buffer.data(), entry_size);
|
||||
t_memcpy_start = ggml_time_ms();
|
||||
memcpy((void*)buf, (void*)(read_buffer.data() + tensor_storage.offset), n);
|
||||
memcpy_time_ms.fetch_add(ggml_time_ms() - t_memcpy_start);
|
||||
} else {
|
||||
zip_entry_noallocread(zip, (void*)buf, n);
|
||||
}
|
||||
zip_entry_close(zip);
|
||||
} else {
|
||||
file.seekg(tensor_storage.offset);
|
||||
file.read(buf, n);
|
||||
if (!file) {
|
||||
LOG_ERROR("read tensor data failed: '%s'", file_path.c_str());
|
||||
failed = true;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (dst_tensor->buffer == NULL || ggml_backend_buffer_is_host(dst_tensor->buffer)) {
|
||||
if (tensor_storage.type == dst_tensor->type) {
|
||||
GGML_ASSERT(ggml_nbytes(dst_tensor) == tensor_storage.nbytes());
|
||||
if (tensor_storage.is_f64 || tensor_storage.is_i64) {
|
||||
read_buffer.resize(tensor_storage.nbytes_to_read());
|
||||
read_data((char*)read_buffer.data(), nbytes_to_read);
|
||||
} else {
|
||||
read_data((char*)dst_tensor->data, nbytes_to_read);
|
||||
}
|
||||
t1 = ggml_time_ms();
|
||||
read_time_ms.fetch_add(t1 - t0);
|
||||
|
||||
t0 = ggml_time_ms();
|
||||
if (tensor_storage.is_bf16) {
|
||||
// inplace op
|
||||
bf16_to_f32_vec((uint16_t*)dst_tensor->data, (float*)dst_tensor->data, tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f8_e4m3) {
|
||||
// inplace op
|
||||
f8_e4m3_to_f16_vec((uint8_t*)dst_tensor->data, (uint16_t*)dst_tensor->data, tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f8_e5m2) {
|
||||
// inplace op
|
||||
f8_e5m2_to_f16_vec((uint8_t*)dst_tensor->data, (uint16_t*)dst_tensor->data, tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f64) {
|
||||
f64_to_f32_vec((double*)read_buffer.data(), (float*)dst_tensor->data, tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_i64) {
|
||||
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)dst_tensor->data, tensor_storage.nelements());
|
||||
}
|
||||
t1 = ggml_time_ms();
|
||||
convert_time_ms.fetch_add(t1 - t0);
|
||||
} else {
|
||||
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
|
||||
read_data((char*)read_buffer.data(), nbytes_to_read);
|
||||
t1 = ggml_time_ms();
|
||||
read_time_ms.fetch_add(t1 - t0);
|
||||
|
||||
t0 = ggml_time_ms();
|
||||
if (tensor_storage.is_bf16) {
|
||||
// inplace op
|
||||
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f8_e4m3) {
|
||||
// inplace op
|
||||
f8_e4m3_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f8_e5m2) {
|
||||
// inplace op
|
||||
f8_e5m2_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f64) {
|
||||
// inplace op
|
||||
f64_to_f32_vec((double*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_i64) {
|
||||
// inplace op
|
||||
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)read_buffer.data(), tensor_storage.nelements());
|
||||
}
|
||||
convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data, dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
|
||||
t1 = ggml_time_ms();
|
||||
convert_time_ms.fetch_add(t1 - t0);
|
||||
}
|
||||
} else {
|
||||
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
|
||||
read_data((char*)read_buffer.data(), nbytes_to_read);
|
||||
t1 = ggml_time_ms();
|
||||
read_time_ms.fetch_add(t1 - t0);
|
||||
|
||||
t0 = ggml_time_ms();
|
||||
if (tensor_storage.is_bf16) {
|
||||
// inplace op
|
||||
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f8_e4m3) {
|
||||
// inplace op
|
||||
f8_e4m3_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f8_e5m2) {
|
||||
// inplace op
|
||||
f8_e5m2_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f64) {
|
||||
// inplace op
|
||||
f64_to_f32_vec((double*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_i64) {
|
||||
// inplace op
|
||||
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)read_buffer.data(), tensor_storage.nelements());
|
||||
}
|
||||
|
||||
if (tensor_storage.type == dst_tensor->type) {
|
||||
// copy to device memory
|
||||
t1 = ggml_time_ms();
|
||||
convert_time_ms.fetch_add(t1 - t0);
|
||||
t0 = ggml_time_ms();
|
||||
ggml_backend_tensor_set(dst_tensor, read_buffer.data(), 0, ggml_nbytes(dst_tensor));
|
||||
t1 = ggml_time_ms();
|
||||
copy_to_backend_time_ms.fetch_add(t1 - t0);
|
||||
} else {
|
||||
// convert first, then copy to device memory
|
||||
|
||||
convert_buffer.resize(ggml_nbytes(dst_tensor));
|
||||
convert_tensor((void*)read_buffer.data(), tensor_storage.type, (void*)convert_buffer.data(), dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
|
||||
t1 = ggml_time_ms();
|
||||
convert_time_ms.fetch_add(t1 - t0);
|
||||
t0 = ggml_time_ms();
|
||||
ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor));
|
||||
t1 = ggml_time_ms();
|
||||
copy_to_backend_time_ms.fetch_add(t1 - t0);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (zip != NULL) {
|
||||
zip_close(zip);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
std::vector<uint8_t> read_buffer;
|
||||
std::vector<uint8_t> convert_buffer;
|
||||
|
||||
auto read_data = [&](const TensorStorage& tensor_storage, char* buf, size_t n) {
|
||||
if (zip != NULL) {
|
||||
zip_entry_openbyindex(zip, tensor_storage.index_in_zip);
|
||||
size_t entry_size = zip_entry_size(zip);
|
||||
if (entry_size != n) {
|
||||
read_buffer.resize(entry_size);
|
||||
zip_entry_noallocread(zip, (void*)read_buffer.data(), entry_size);
|
||||
memcpy((void*)buf, (void*)(read_buffer.data() + tensor_storage.offset), n);
|
||||
} else {
|
||||
zip_entry_noallocread(zip, (void*)buf, n);
|
||||
}
|
||||
zip_entry_close(zip);
|
||||
} else {
|
||||
file.seekg(tensor_storage.offset);
|
||||
file.read(buf, n);
|
||||
if (!file) {
|
||||
LOG_ERROR("read tensor data failed: '%s'", file_path.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
int tensor_count = 0;
|
||||
int64_t t1 = ggml_time_ms();
|
||||
bool partial = false;
|
||||
for (auto& tensor_storage : processed_tensor_storages) {
|
||||
if (tensor_storage.file_index != file_index) {
|
||||
++tensor_count;
|
||||
continue;
|
||||
}
|
||||
ggml_tensor* dst_tensor = NULL;
|
||||
|
||||
success = on_new_tensor_cb(tensor_storage, &dst_tensor);
|
||||
if (!success) {
|
||||
LOG_WARN("process tensor failed: '%s'", tensor_storage.name.c_str());
|
||||
while (true) {
|
||||
size_t current_idx = tensor_idx.load();
|
||||
if (current_idx >= file_tensors.size() || failed) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (dst_tensor == NULL) {
|
||||
++tensor_count;
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t nbytes_to_read = tensor_storage.nbytes_to_read();
|
||||
|
||||
if (dst_tensor->buffer == NULL || ggml_backend_buffer_is_host(dst_tensor->buffer)) {
|
||||
// for the CPU and Metal backend, we can copy directly into the tensor
|
||||
if (tensor_storage.type == dst_tensor->type) {
|
||||
GGML_ASSERT(ggml_nbytes(dst_tensor) == tensor_storage.nbytes());
|
||||
if (tensor_storage.is_f64 || tensor_storage.is_i64) {
|
||||
read_buffer.resize(tensor_storage.nbytes_to_read());
|
||||
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);
|
||||
} else {
|
||||
read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read);
|
||||
}
|
||||
|
||||
if (tensor_storage.is_bf16) {
|
||||
// inplace op
|
||||
bf16_to_f32_vec((uint16_t*)dst_tensor->data, (float*)dst_tensor->data, tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f8_e4m3) {
|
||||
// inplace op
|
||||
f8_e4m3_to_f16_vec((uint8_t*)dst_tensor->data, (uint16_t*)dst_tensor->data, tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f8_e5m2) {
|
||||
// inplace op
|
||||
f8_e5m2_to_f16_vec((uint8_t*)dst_tensor->data, (uint16_t*)dst_tensor->data, tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f64) {
|
||||
f64_to_f32_vec((double*)read_buffer.data(), (float*)dst_tensor->data, tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_i64) {
|
||||
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)dst_tensor->data, tensor_storage.nelements());
|
||||
}
|
||||
} else {
|
||||
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
|
||||
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);
|
||||
|
||||
if (tensor_storage.is_bf16) {
|
||||
// inplace op
|
||||
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f8_e4m3) {
|
||||
// inplace op
|
||||
f8_e4m3_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f8_e5m2) {
|
||||
// inplace op
|
||||
f8_e5m2_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f64) {
|
||||
// inplace op
|
||||
f64_to_f32_vec((double*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_i64) {
|
||||
// inplace op
|
||||
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)read_buffer.data(), tensor_storage.nelements());
|
||||
}
|
||||
|
||||
convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data,
|
||||
dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
|
||||
}
|
||||
} else {
|
||||
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
|
||||
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);
|
||||
|
||||
if (tensor_storage.is_bf16) {
|
||||
// inplace op
|
||||
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f8_e4m3) {
|
||||
// inplace op
|
||||
f8_e4m3_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f8_e5m2) {
|
||||
// inplace op
|
||||
f8_e5m2_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_f64) {
|
||||
// inplace op
|
||||
f64_to_f32_vec((double*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
|
||||
} else if (tensor_storage.is_i64) {
|
||||
// inplace op
|
||||
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)read_buffer.data(), tensor_storage.nelements());
|
||||
}
|
||||
|
||||
if (tensor_storage.type == dst_tensor->type) {
|
||||
// copy to device memory
|
||||
ggml_backend_tensor_set(dst_tensor, read_buffer.data(), 0, ggml_nbytes(dst_tensor));
|
||||
} else {
|
||||
// convert first, then copy to device memory
|
||||
convert_buffer.resize(ggml_nbytes(dst_tensor));
|
||||
convert_tensor((void*)read_buffer.data(), tensor_storage.type,
|
||||
(void*)convert_buffer.data(), dst_tensor->type,
|
||||
(int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
|
||||
ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor));
|
||||
}
|
||||
}
|
||||
size_t tensor_max = processed_tensor_storages.size();
|
||||
int64_t t2 = ggml_time_ms();
|
||||
// kcpp throttle progress printing
|
||||
++tensor_count;
|
||||
if(tensor_count<2 || tensor_count%5==0 || (tensor_count+10) > tensor_max)
|
||||
{
|
||||
pretty_progress(tensor_count, tensor_max, (t2 - t1) / 1000.0f);
|
||||
}
|
||||
t1 = t2;
|
||||
partial = tensor_count != tensor_max;
|
||||
size_t curr_num = total_tensors_processed + current_idx;
|
||||
pretty_progress(curr_num, total_tensors_to_process, (ggml_time_ms() - t_start) / 1000.0f / (curr_num + 1e-6f));
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200));
|
||||
}
|
||||
|
||||
if (zip != NULL) {
|
||||
zip_close(zip);
|
||||
for (auto& w : workers) {
|
||||
w.join();
|
||||
}
|
||||
|
||||
if (partial) {
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
if (failed) {
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
total_tensors_processed += file_tensors.size();
|
||||
pretty_progress(total_tensors_processed, total_tensors_to_process, (ggml_time_ms() - t_start) / 1000.0f / (total_tensors_processed + 1e-6f));
|
||||
if (total_tensors_processed < total_tensors_to_process) {
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
int64_t end_time = ggml_time_ms();
|
||||
LOG_INFO("loading tensors completed, taking %.2fs (process: %.2fs, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)",
|
||||
(end_time - start_time) / 1000.f,
|
||||
process_time_ms / 1000.f,
|
||||
(read_time_ms.load() / (float)last_n_threads) / 1000.f,
|
||||
(memcpy_time_ms.load() / (float)last_n_threads) / 1000.f,
|
||||
(convert_time_ms.load() / (float)last_n_threads) / 1000.f,
|
||||
(copy_to_backend_time_ms.load() / (float)last_n_threads) / 1000.f);
|
||||
return success;
|
||||
}
|
||||
|
||||
bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
|
||||
ggml_backend_t backend,
|
||||
std::set<std::string> ignore_tensors) {
|
||||
std::set<std::string> ignore_tensors,
|
||||
int n_threads) {
|
||||
std::set<std::string> tensor_names_in_file;
|
||||
std::mutex tensor_names_mutex;
|
||||
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
|
||||
const std::string& name = tensor_storage.name;
|
||||
// LOG_DEBUG("%s", tensor_storage.to_string().c_str());
|
||||
tensor_names_in_file.insert(name);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(tensor_names_mutex);
|
||||
tensor_names_in_file.insert(name);
|
||||
}
|
||||
|
||||
struct ggml_tensor* real;
|
||||
if (tensors.find(name) != tensors.end()) {
|
||||
|
|
@ -2159,7 +2380,7 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
|
|||
return true;
|
||||
};
|
||||
|
||||
bool success = load_tensors(on_new_tensor_cb, backend);
|
||||
bool success = load_tensors(on_new_tensor_cb, n_threads);
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from file failed");
|
||||
return false;
|
||||
|
|
@ -2190,7 +2411,7 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
|
|||
|
||||
std::vector<std::pair<std::string, ggml_type>> parse_tensor_type_rules(const std::string& tensor_type_rules) {
|
||||
std::vector<std::pair<std::string, ggml_type>> result;
|
||||
for (const auto& item : splitString(tensor_type_rules, ',')) {
|
||||
for (const auto& item : split_string(tensor_type_rules, ',')) {
|
||||
if (item.size() == 0)
|
||||
continue;
|
||||
std::string::size_type pos = item.find('=');
|
||||
|
|
@ -2206,7 +2427,7 @@ std::vector<std::pair<std::string, ggml_type>> parse_tensor_type_rules(const std
|
|||
if (type_name == "f32") {
|
||||
tensor_type = GGML_TYPE_F32;
|
||||
} else {
|
||||
for (size_t i = 0; i < SD_TYPE_COUNT; i++) {
|
||||
for (size_t i = 0; i < GGML_TYPE_COUNT; i++) {
|
||||
auto trait = ggml_get_type_traits((ggml_type)i);
|
||||
if (trait->to_float && trait->type_size && type_name == trait->type_name) {
|
||||
tensor_type = (ggml_type)i;
|
||||
|
|
@ -2247,6 +2468,8 @@ bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage
|
|||
// Pass, do not convert. For MMDiT
|
||||
} else if (contains(name, "time_embed.") || contains(name, "label_emb.")) {
|
||||
// Pass, do not convert. For Unet
|
||||
} else if (contains(name, "embedding")) {
|
||||
// Pass, do not convert embedding
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -2266,6 +2489,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
|
|||
|
||||
auto tensor_type_rules = parse_tensor_type_rules(tensor_type_rules_str);
|
||||
|
||||
std::mutex tensor_mutex;
|
||||
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
|
||||
const std::string& name = tensor_storage.name;
|
||||
ggml_type tensor_type = tensor_storage.type;
|
||||
|
|
@ -2283,6 +2507,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
|
|||
tensor_type = dst_type;
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(tensor_mutex);
|
||||
ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
|
||||
if (tensor == NULL) {
|
||||
LOG_ERROR("ggml_new_tensor failed");
|
||||
|
|
@ -2303,7 +2528,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
|
|||
return true;
|
||||
};
|
||||
|
||||
bool success = load_tensors(on_new_tensor_cb, backend);
|
||||
bool success = load_tensors(on_new_tensor_cb);
|
||||
ggml_backend_free(backend);
|
||||
LOG_INFO("load tensors done");
|
||||
LOG_INFO("trying to save tensors to %s", file_path.c_str());
|
||||
|
|
|
|||
|
|
@ -31,23 +31,12 @@ enum SDVersion {
|
|||
VERSION_SD3,
|
||||
VERSION_FLUX,
|
||||
VERSION_FLUX_FILL,
|
||||
VERSION_WAN2,
|
||||
VERSION_WAN2_2_I2V,
|
||||
VERSION_WAN2_2_TI2V,
|
||||
VERSION_COUNT,
|
||||
};
|
||||
|
||||
static inline bool sd_version_is_flux(SDVersion version) {
|
||||
if (version == VERSION_FLUX || version == VERSION_FLUX_FILL) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool sd_version_is_sd3(SDVersion version) {
|
||||
if (version == VERSION_SD3) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool sd_version_is_sd1(SDVersion version) {
|
||||
if (version == VERSION_SD1 || version == VERSION_SD1_INPAINT || version == VERSION_SD1_PIX2PIX) {
|
||||
return true;
|
||||
|
|
@ -69,6 +58,27 @@ static inline bool sd_version_is_sdxl(SDVersion version) {
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline bool sd_version_is_sd3(SDVersion version) {
|
||||
if (version == VERSION_SD3) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool sd_version_is_flux(SDVersion version) {
|
||||
if (version == VERSION_FLUX || version == VERSION_FLUX_FILL) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool sd_version_is_wan(SDVersion version) {
|
||||
if (version == VERSION_WAN2 || version == VERSION_WAN2_2_I2V || version == VERSION_WAN2_2_TI2V) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool sd_version_is_inpaint(SDVersion version) {
|
||||
if (version == VERSION_SD1_INPAINT || version == VERSION_SD2_INPAINT || version == VERSION_SDXL_INPAINT || version == VERSION_FLUX_FILL) {
|
||||
return true;
|
||||
|
|
@ -77,7 +87,7 @@ static inline bool sd_version_is_inpaint(SDVersion version) {
|
|||
}
|
||||
|
||||
static inline bool sd_version_is_dit(SDVersion version) {
|
||||
if (sd_version_is_flux(version) || sd_version_is_sd3(version)) {
|
||||
if (sd_version_is_flux(version) || sd_version_is_sd3(version) || sd_version_is_wan(version)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
@ -109,11 +119,11 @@ struct TensorStorage {
|
|||
|
||||
size_t file_index = 0;
|
||||
int index_in_zip = -1; // >= means stored in a zip file
|
||||
size_t offset = 0; // offset in file
|
||||
uint64_t offset = 0; // offset in file
|
||||
|
||||
TensorStorage() = default;
|
||||
|
||||
TensorStorage(const std::string& name, ggml_type type, int64_t* ne, int n_dims, size_t file_index, size_t offset = 0)
|
||||
TensorStorage(const std::string& name, ggml_type type, const int64_t* ne, int n_dims, size_t file_index, size_t offset = 0)
|
||||
: name(name), type(type), n_dims(n_dims), file_index(file_index), offset(offset) {
|
||||
for (int i = 0; i < n_dims; i++) {
|
||||
this->ne[i] = ne[i];
|
||||
|
|
@ -154,10 +164,10 @@ struct TensorStorage {
|
|||
|
||||
std::vector<TensorStorage> chunk(size_t n) {
|
||||
std::vector<TensorStorage> chunks;
|
||||
size_t chunk_size = nbytes_to_read() / n;
|
||||
uint64_t chunk_size = nbytes_to_read() / n;
|
||||
// printf("%d/%d\n", chunk_size, nbytes_to_read());
|
||||
reverse_ne();
|
||||
for (int i = 0; i < n; i++) {
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
TensorStorage chunk_i = *this;
|
||||
chunk_i.ne[0] = ne[0] / n;
|
||||
chunk_i.offset = offset + i * chunk_size;
|
||||
|
|
@ -238,10 +248,10 @@ public:
|
|||
ggml_type get_diffusion_model_wtype();
|
||||
ggml_type get_vae_wtype();
|
||||
void set_wtype_override(ggml_type wtype, std::string prefix = "");
|
||||
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend_t backend);
|
||||
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads = 0);
|
||||
bool load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
|
||||
ggml_backend_t backend,
|
||||
std::set<std::string> ignore_tensors = {});
|
||||
std::set<std::string> ignore_tensors = {},
|
||||
int n_threads = 0);
|
||||
|
||||
bool save_to_gguf_file(const std::string& file_path, ggml_type type, const std::string& tensor_type_rules);
|
||||
bool tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type);
|
||||
|
|
@ -250,6 +260,7 @@ public:
|
|||
|
||||
static std::string load_merges();
|
||||
static std::string load_t5_tokenizer_json();
|
||||
static std::string load_umt5_tokenizer_json();
|
||||
};
|
||||
|
||||
#endif // __MODEL_H__
|
||||
|
|
|
|||
|
|
@ -42,41 +42,6 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
/*
|
||||
class QFormerPerceiver(nn.Module):
|
||||
def __init__(self, id_embeddings_dim, cross_attention_dim, num_tokens, embedding_dim=1024, use_residual=True, ratio=4):
|
||||
super().__init__()
|
||||
|
||||
self.num_tokens = num_tokens
|
||||
self.cross_attention_dim = cross_attention_dim
|
||||
self.use_residual = use_residual
|
||||
print(cross_attention_dim*num_tokens)
|
||||
self.token_proj = nn.Sequential(
|
||||
nn.Linear(id_embeddings_dim, id_embeddings_dim*ratio),
|
||||
nn.GELU(),
|
||||
nn.Linear(id_embeddings_dim*ratio, cross_attention_dim*num_tokens),
|
||||
)
|
||||
self.token_norm = nn.LayerNorm(cross_attention_dim)
|
||||
self.perceiver_resampler = FacePerceiverResampler(
|
||||
dim=cross_attention_dim,
|
||||
depth=4,
|
||||
dim_head=128,
|
||||
heads=cross_attention_dim // 128,
|
||||
embedding_dim=embedding_dim,
|
||||
output_dim=cross_attention_dim,
|
||||
ff_mult=4,
|
||||
)
|
||||
|
||||
def forward(self, x, last_hidden_state):
|
||||
x = self.token_proj(x)
|
||||
x = x.reshape(-1, self.num_tokens, self.cross_attention_dim)
|
||||
x = self.token_norm(x) # cls token
|
||||
out = self.perceiver_resampler(x, last_hidden_state) # retrieve from patch tokens
|
||||
if self.use_residual: # TODO: if use_residual is not true
|
||||
out = x + 1.0 * out
|
||||
return out
|
||||
*/
|
||||
|
||||
struct PMFeedForward : public GGMLBlock {
|
||||
// network hparams
|
||||
int dim;
|
||||
|
|
@ -122,17 +87,8 @@ public:
|
|||
int64_t ne[4];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
ne[i] = x->ne[i];
|
||||
// print_ggml_tensor(x, true, "PerceiverAttention reshape x 0: ");
|
||||
// printf("heads = %d \n", heads);
|
||||
// x = ggml_view_4d(ctx, x, x->ne[0], x->ne[1], heads, x->ne[2]/heads,
|
||||
// x->nb[1], x->nb[2], x->nb[3], 0);
|
||||
x = ggml_reshape_4d(ctx, x, x->ne[0] / heads, heads, x->ne[1], x->ne[2]);
|
||||
// x = ggml_view_4d(ctx, x, x->ne[0]/heads, heads, x->ne[1], x->ne[2],
|
||||
// x->nb[1], x->nb[2], x->nb[3], 0);
|
||||
// x = ggml_cont(ctx, x);
|
||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3));
|
||||
// print_ggml_tensor(x, true, "PerceiverAttention reshape x 1: ");
|
||||
// x = ggml_reshape_4d(ctx, x, ne[0], heads, ne[1], ne[2]/heads);
|
||||
return x;
|
||||
}
|
||||
|
||||
|
|
@ -269,17 +225,6 @@ public:
|
|||
4));
|
||||
}
|
||||
|
||||
/*
|
||||
def forward(self, x, last_hidden_state):
|
||||
x = self.token_proj(x)
|
||||
x = x.reshape(-1, self.num_tokens, self.cross_attention_dim)
|
||||
x = self.token_norm(x) # cls token
|
||||
out = self.perceiver_resampler(x, last_hidden_state) # retrieve from patch tokens
|
||||
if self.use_residual: # TODO: if use_residual is not true
|
||||
out = x + 1.0 * out
|
||||
return out
|
||||
*/
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* last_hidden_state) {
|
||||
|
|
@ -299,113 +244,6 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
/*
|
||||
class FacePerceiverResampler(torch.nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
dim=768,
|
||||
depth=4,
|
||||
dim_head=64,
|
||||
heads=16,
|
||||
embedding_dim=1280,
|
||||
output_dim=768,
|
||||
ff_mult=4,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self.proj_in = torch.nn.Linear(embedding_dim, dim)
|
||||
self.proj_out = torch.nn.Linear(dim, output_dim)
|
||||
self.norm_out = torch.nn.LayerNorm(output_dim)
|
||||
self.layers = torch.nn.ModuleList([])
|
||||
for _ in range(depth):
|
||||
self.layers.append(
|
||||
torch.nn.ModuleList(
|
||||
[
|
||||
PerceiverAttention(dim=dim, dim_head=dim_head, heads=heads),
|
||||
FeedForward(dim=dim, mult=ff_mult),
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
def forward(self, latents, x):
|
||||
x = self.proj_in(x)
|
||||
for attn, ff in self.layers:
|
||||
latents = attn(x, latents) + latents
|
||||
latents = ff(latents) + latents
|
||||
latents = self.proj_out(latents)
|
||||
return self.norm_out(latents)
|
||||
*/
|
||||
|
||||
/*
|
||||
|
||||
def FeedForward(dim, mult=4):
|
||||
inner_dim = int(dim * mult)
|
||||
return nn.Sequential(
|
||||
nn.LayerNorm(dim),
|
||||
nn.Linear(dim, inner_dim, bias=False),
|
||||
nn.GELU(),
|
||||
nn.Linear(inner_dim, dim, bias=False),
|
||||
)
|
||||
|
||||
def reshape_tensor(x, heads):
|
||||
bs, length, width = x.shape
|
||||
# (bs, length, width) --> (bs, length, n_heads, dim_per_head)
|
||||
x = x.view(bs, length, heads, -1)
|
||||
# (bs, length, n_heads, dim_per_head) --> (bs, n_heads, length, dim_per_head)
|
||||
x = x.transpose(1, 2)
|
||||
# (bs, n_heads, length, dim_per_head) --> (bs*n_heads, length, dim_per_head)
|
||||
x = x.reshape(bs, heads, length, -1)
|
||||
return x
|
||||
|
||||
class PerceiverAttention(nn.Module):
|
||||
def __init__(self, *, dim, dim_head=64, heads=8):
|
||||
super().__init__()
|
||||
self.scale = dim_head**-0.5
|
||||
self.dim_head = dim_head
|
||||
self.heads = heads
|
||||
inner_dim = dim_head * heads
|
||||
|
||||
self.norm1 = nn.LayerNorm(dim)
|
||||
self.norm2 = nn.LayerNorm(dim)
|
||||
|
||||
self.to_q = nn.Linear(dim, inner_dim, bias=False)
|
||||
self.to_kv = nn.Linear(dim, inner_dim * 2, bias=False)
|
||||
self.to_out = nn.Linear(inner_dim, dim, bias=False)
|
||||
|
||||
def forward(self, x, latents):
|
||||
"""
|
||||
Args:
|
||||
x (torch.Tensor): image features
|
||||
shape (b, n1, D)
|
||||
latent (torch.Tensor): latent features
|
||||
shape (b, n2, D)
|
||||
"""
|
||||
x = self.norm1(x)
|
||||
latents = self.norm2(latents)
|
||||
|
||||
b, l, _ = latents.shape
|
||||
|
||||
q = self.to_q(latents)
|
||||
kv_input = torch.cat((x, latents), dim=-2)
|
||||
k, v = self.to_kv(kv_input).chunk(2, dim=-1)
|
||||
|
||||
q = reshape_tensor(q, self.heads)
|
||||
k = reshape_tensor(k, self.heads)
|
||||
v = reshape_tensor(v, self.heads)
|
||||
|
||||
# attention
|
||||
scale = 1 / math.sqrt(math.sqrt(self.dim_head))
|
||||
weight = (q * scale) @ (k * scale).transpose(-2, -1) # More stable with f16 than dividing afterwards
|
||||
weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype)
|
||||
out = weight @ v
|
||||
|
||||
out = out.permute(0, 2, 1, 3).reshape(b, l, -1)
|
||||
|
||||
return self.to_out(out)
|
||||
|
||||
*/
|
||||
|
||||
struct FuseModule : public GGMLBlock {
|
||||
// network hparams
|
||||
int embed_dim;
|
||||
|
|
@ -425,31 +263,13 @@ public:
|
|||
auto mlp2 = std::dynamic_pointer_cast<FuseBlock>(blocks["mlp2"]);
|
||||
auto layer_norm = std::dynamic_pointer_cast<LayerNorm>(blocks["layer_norm"]);
|
||||
|
||||
// print_ggml_tensor(id_embeds, true, "Fuseblock id_embeds: ");
|
||||
// print_ggml_tensor(prompt_embeds, true, "Fuseblock prompt_embeds: ");
|
||||
|
||||
// auto prompt_embeds0 = ggml_cont(ctx, ggml_permute(ctx, prompt_embeds, 2, 0, 1, 3));
|
||||
// auto id_embeds0 = ggml_cont(ctx, ggml_permute(ctx, id_embeds, 2, 0, 1, 3));
|
||||
// print_ggml_tensor(id_embeds0, true, "Fuseblock id_embeds0: ");
|
||||
// print_ggml_tensor(prompt_embeds0, true, "Fuseblock prompt_embeds0: ");
|
||||
// concat is along dim 2
|
||||
// auto stacked_id_embeds = ggml_concat(ctx, prompt_embeds0, id_embeds0, 2);
|
||||
auto stacked_id_embeds = ggml_concat(ctx, prompt_embeds, id_embeds, 0);
|
||||
// print_ggml_tensor(stacked_id_embeds, true, "Fuseblock stacked_id_embeds 0: ");
|
||||
// stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 1, 2, 0, 3));
|
||||
// print_ggml_tensor(stacked_id_embeds, true, "Fuseblock stacked_id_embeds 1: ");
|
||||
// stacked_id_embeds = mlp1.forward(ctx, stacked_id_embeds);
|
||||
// stacked_id_embeds = ggml_add(ctx, stacked_id_embeds, prompt_embeds);
|
||||
// stacked_id_embeds = mlp2.forward(ctx, stacked_id_embeds);
|
||||
// stacked_id_embeds = ggml_nn_layer_norm(ctx, stacked_id_embeds, ln_w, ln_b);
|
||||
|
||||
stacked_id_embeds = mlp1->forward(ctx, stacked_id_embeds);
|
||||
stacked_id_embeds = ggml_add(ctx, stacked_id_embeds, prompt_embeds);
|
||||
stacked_id_embeds = mlp2->forward(ctx, stacked_id_embeds);
|
||||
stacked_id_embeds = layer_norm->forward(ctx, stacked_id_embeds);
|
||||
|
||||
// print_ggml_tensor(stacked_id_embeds, true, "Fuseblock stacked_id_embeds 1: ");
|
||||
|
||||
return stacked_id_embeds;
|
||||
}
|
||||
|
||||
|
|
@ -464,21 +284,14 @@ public:
|
|||
|
||||
struct ggml_tensor* valid_id_embeds = id_embeds;
|
||||
// # slice out the image token embeddings
|
||||
// print_ggml_tensor(class_tokens_mask_pos, false);
|
||||
ggml_set_name(class_tokens_mask_pos, "class_tokens_mask_pos");
|
||||
ggml_set_name(prompt_embeds, "prompt_embeds");
|
||||
// print_ggml_tensor(valid_id_embeds, true, "valid_id_embeds");
|
||||
// print_ggml_tensor(class_tokens_mask_pos, true, "class_tokens_mask_pos");
|
||||
struct ggml_tensor* image_token_embeds = ggml_get_rows(ctx, prompt_embeds, class_tokens_mask_pos);
|
||||
ggml_set_name(image_token_embeds, "image_token_embeds");
|
||||
valid_id_embeds = ggml_reshape_2d(ctx, valid_id_embeds, valid_id_embeds->ne[0],
|
||||
ggml_nelements(valid_id_embeds) / valid_id_embeds->ne[0]);
|
||||
struct ggml_tensor* stacked_id_embeds = fuse_fn(ctx, image_token_embeds, valid_id_embeds);
|
||||
|
||||
// stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 0, 2, 1, 3));
|
||||
// print_ggml_tensor(stacked_id_embeds, true, "AA stacked_id_embeds");
|
||||
// print_ggml_tensor(left, true, "AA left");
|
||||
// print_ggml_tensor(right, true, "AA right");
|
||||
if (left && right) {
|
||||
stacked_id_embeds = ggml_concat(ctx, left, stacked_id_embeds, 1);
|
||||
stacked_id_embeds = ggml_concat(ctx, stacked_id_embeds, right, 1);
|
||||
|
|
@ -487,15 +300,12 @@ public:
|
|||
} else if (right) {
|
||||
stacked_id_embeds = ggml_concat(ctx, stacked_id_embeds, right, 1);
|
||||
}
|
||||
// print_ggml_tensor(stacked_id_embeds, true, "BB stacked_id_embeds");
|
||||
// stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 0, 2, 1, 3));
|
||||
// print_ggml_tensor(stacked_id_embeds, true, "CC stacked_id_embeds");
|
||||
|
||||
class_tokens_mask = ggml_cont(ctx, ggml_transpose(ctx, class_tokens_mask));
|
||||
class_tokens_mask = ggml_repeat(ctx, class_tokens_mask, prompt_embeds);
|
||||
prompt_embeds = ggml_mul(ctx, prompt_embeds, class_tokens_mask);
|
||||
struct ggml_tensor* updated_prompt_embeds = ggml_add(ctx, prompt_embeds, stacked_id_embeds);
|
||||
ggml_set_name(updated_prompt_embeds, "updated_prompt_embeds");
|
||||
// print_ggml_tensor(updated_prompt_embeds, true, "updated_prompt_embeds: ");
|
||||
return updated_prompt_embeds;
|
||||
}
|
||||
};
|
||||
|
|
@ -508,6 +318,7 @@ struct PhotoMakerIDEncoderBlock : public CLIPVisionModelProjection {
|
|||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* id_pixel_values,
|
||||
struct ggml_tensor* prompt_embeds,
|
||||
struct ggml_tensor* class_tokens_mask,
|
||||
|
|
@ -520,9 +331,9 @@ struct PhotoMakerIDEncoderBlock : public CLIPVisionModelProjection {
|
|||
auto visual_projection_2 = std::dynamic_pointer_cast<Linear>(blocks["visual_projection_2"]);
|
||||
auto fuse_module = std::dynamic_pointer_cast<FuseModule>(blocks["fuse_module"]);
|
||||
|
||||
struct ggml_tensor* shared_id_embeds = vision_model->forward(ctx, id_pixel_values); // [N, hidden_size]
|
||||
struct ggml_tensor* id_embeds = visual_projection->forward(ctx, shared_id_embeds); // [N, proj_dim(768)]
|
||||
struct ggml_tensor* id_embeds_2 = visual_projection_2->forward(ctx, shared_id_embeds); // [N, 1280]
|
||||
struct ggml_tensor* shared_id_embeds = vision_model->forward(ctx, backend, id_pixel_values); // [N, hidden_size]
|
||||
struct ggml_tensor* id_embeds = visual_projection->forward(ctx, shared_id_embeds); // [N, proj_dim(768)]
|
||||
struct ggml_tensor* id_embeds_2 = visual_projection_2->forward(ctx, shared_id_embeds); // [N, 1280]
|
||||
|
||||
id_embeds = ggml_cont(ctx, ggml_permute(ctx, id_embeds, 2, 0, 1, 3));
|
||||
id_embeds_2 = ggml_cont(ctx, ggml_permute(ctx, id_embeds_2, 2, 0, 1, 3));
|
||||
|
|
@ -550,35 +361,13 @@ struct PhotoMakerIDEncoder_CLIPInsightfaceExtendtokenBlock : public CLIPVisionMo
|
|||
num_tokens(2) {
|
||||
blocks["visual_projection_2"] = std::shared_ptr<GGMLBlock>(new Linear(1024, 1280, false));
|
||||
blocks["fuse_module"] = std::shared_ptr<GGMLBlock>(new FuseModule(2048));
|
||||
/*
|
||||
cross_attention_dim = 2048
|
||||
# projection
|
||||
self.num_tokens = 2
|
||||
self.cross_attention_dim = cross_attention_dim
|
||||
self.qformer_perceiver = QFormerPerceiver(
|
||||
id_embeddings_dim,
|
||||
cross_attention_dim,
|
||||
self.num_tokens,
|
||||
)*/
|
||||
blocks["qformer_perceiver"] = std::shared_ptr<GGMLBlock>(new QFormerPerceiver(id_embeddings_dim,
|
||||
cross_attention_dim,
|
||||
num_tokens));
|
||||
blocks["qformer_perceiver"] = std::shared_ptr<GGMLBlock>(new QFormerPerceiver(id_embeddings_dim,
|
||||
cross_attention_dim,
|
||||
num_tokens));
|
||||
}
|
||||
|
||||
/*
|
||||
def forward(self, id_pixel_values, prompt_embeds, class_tokens_mask, id_embeds):
|
||||
b, num_inputs, c, h, w = id_pixel_values.shape
|
||||
id_pixel_values = id_pixel_values.view(b * num_inputs, c, h, w)
|
||||
|
||||
last_hidden_state = self.vision_model(id_pixel_values)[0]
|
||||
id_embeds = id_embeds.view(b * num_inputs, -1)
|
||||
|
||||
id_embeds = self.qformer_perceiver(id_embeds, last_hidden_state)
|
||||
id_embeds = id_embeds.view(b, num_inputs, self.num_tokens, -1)
|
||||
updated_prompt_embeds = self.fuse_module(prompt_embeds, id_embeds, class_tokens_mask)
|
||||
*/
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* id_pixel_values,
|
||||
struct ggml_tensor* prompt_embeds,
|
||||
struct ggml_tensor* class_tokens_mask,
|
||||
|
|
@ -592,7 +381,7 @@ struct PhotoMakerIDEncoder_CLIPInsightfaceExtendtokenBlock : public CLIPVisionMo
|
|||
auto qformer_perceiver = std::dynamic_pointer_cast<QFormerPerceiver>(blocks["qformer_perceiver"]);
|
||||
|
||||
// struct ggml_tensor* last_hidden_state = vision_model->forward(ctx, id_pixel_values); // [N, hidden_size]
|
||||
struct ggml_tensor* last_hidden_state = vision_model->forward(ctx, id_pixel_values, false); // [N, hidden_size]
|
||||
struct ggml_tensor* last_hidden_state = vision_model->forward(ctx, backend, id_pixel_values, false); // [N, hidden_size]
|
||||
id_embeds = qformer_perceiver->forward(ctx, id_embeds, last_hidden_state);
|
||||
|
||||
struct ggml_tensor* updated_prompt_embeds = fuse_module->forward(ctx,
|
||||
|
|
@ -624,12 +413,13 @@ public:
|
|||
|
||||
public:
|
||||
PhotoMakerIDEncoder(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types,
|
||||
const std::string prefix,
|
||||
SDVersion version = VERSION_SDXL,
|
||||
PMVersion pm_v = PM_VERSION_1,
|
||||
float sty = 20.f)
|
||||
: GGMLRunner(backend),
|
||||
: GGMLRunner(backend, offload_params_to_cpu),
|
||||
version(version),
|
||||
pm_version(pm_v),
|
||||
style_strength(sty) {
|
||||
|
|
@ -741,6 +531,7 @@ public:
|
|||
struct ggml_tensor* updated_prompt_embeds = NULL;
|
||||
if (pm_version == PM_VERSION_1)
|
||||
updated_prompt_embeds = id_encoder.forward(ctx0,
|
||||
runtime_backend,
|
||||
id_pixel_values_d,
|
||||
prompt_embeds_d,
|
||||
class_tokens_mask_d,
|
||||
|
|
@ -748,6 +539,7 @@ public:
|
|||
left, right);
|
||||
else if (pm_version == PM_VERSION_2)
|
||||
updated_prompt_embeds = id_encoder2.forward(ctx0,
|
||||
runtime_backend,
|
||||
id_pixel_values_d,
|
||||
prompt_embeds_d,
|
||||
class_tokens_mask_d,
|
||||
|
|
@ -785,10 +577,11 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
|
|||
bool applied = false;
|
||||
|
||||
PhotoMakerIDEmbed(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
ModelLoader* ml,
|
||||
const std::string& file_path = "",
|
||||
const std::string& prefix = "")
|
||||
: file_path(file_path), GGMLRunner(backend), model_loader(ml) {
|
||||
: file_path(file_path), GGMLRunner(backend, offload_params_to_cpu), model_loader(ml) {
|
||||
if (!model_loader->init_from_file(file_path, prefix)) {
|
||||
load_failed = true;
|
||||
}
|
||||
|
|
@ -798,7 +591,7 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
|
|||
return "id_embeds";
|
||||
}
|
||||
|
||||
bool load_from_file(bool filter_tensor = false) {
|
||||
bool load_from_file(bool filter_tensor, int n_threads) {
|
||||
LOG_INFO("loading PhotoMaker ID Embeds from '%s'", file_path.c_str());
|
||||
|
||||
if (load_failed) {
|
||||
|
|
@ -806,7 +599,8 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool dry_run = true;
|
||||
bool dry_run = true;
|
||||
std::mutex tensor_mutex;
|
||||
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
|
||||
const std::string& name = tensor_storage.name;
|
||||
|
||||
|
|
@ -815,6 +609,7 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
|
|||
return true;
|
||||
}
|
||||
if (dry_run) {
|
||||
std::lock_guard<std::mutex> lock(tensor_mutex);
|
||||
struct ggml_tensor* real = ggml_new_tensor(params_ctx,
|
||||
tensor_storage.type,
|
||||
tensor_storage.n_dims,
|
||||
|
|
@ -828,11 +623,11 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
|
|||
return true;
|
||||
};
|
||||
|
||||
model_loader->load_tensors(on_new_tensor_cb, backend);
|
||||
model_loader->load_tensors(on_new_tensor_cb, n_threads);
|
||||
alloc_params_buffer();
|
||||
|
||||
dry_run = false;
|
||||
model_loader->load_tensors(on_new_tensor_cb, backend);
|
||||
model_loader->load_tensors(on_new_tensor_cb, n_threads);
|
||||
|
||||
LOG_DEBUG("finished loading PhotoMaker ID Embeds ");
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -162,16 +162,16 @@ void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float lo
|
|||
}
|
||||
}
|
||||
|
||||
uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_threshold, float low_threshold, float weak, float strong, bool inverse) {
|
||||
bool preprocess_canny(sd_image_t img, float high_threshold, float low_threshold, float weak, float strong, bool inverse) {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10
|
||||
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10MB
|
||||
params.mem_buffer = NULL;
|
||||
params.no_alloc = false;
|
||||
struct ggml_context* work_ctx = ggml_init(params);
|
||||
|
||||
if (!work_ctx) {
|
||||
LOG_ERROR("ggml_init() failed");
|
||||
return NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
float kX[9] = {
|
||||
|
|
@ -192,8 +192,8 @@ uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_thresh
|
|||
struct ggml_tensor* sf_ky = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1);
|
||||
memcpy(sf_ky->data, kY, ggml_nbytes(sf_ky));
|
||||
gaussian_kernel(gkernel);
|
||||
struct ggml_tensor* image = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
|
||||
struct ggml_tensor* image_gray = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 1, 1);
|
||||
struct ggml_tensor* image = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, img.width, img.height, 3, 1);
|
||||
struct ggml_tensor* image_gray = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, img.width, img.height, 1, 1);
|
||||
struct ggml_tensor* iX = ggml_dup_tensor(work_ctx, image_gray);
|
||||
struct ggml_tensor* iY = ggml_dup_tensor(work_ctx, image_gray);
|
||||
struct ggml_tensor* G = ggml_dup_tensor(work_ctx, image_gray);
|
||||
|
|
@ -209,8 +209,8 @@ uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_thresh
|
|||
non_max_supression(image_gray, G, tetha);
|
||||
threshold_hystersis(image_gray, high_threshold, low_threshold, weak, strong);
|
||||
// to RGB channels
|
||||
for (int iy = 0; iy < height; iy++) {
|
||||
for (int ix = 0; ix < width; ix++) {
|
||||
for (int iy = 0; iy < img.height; iy++) {
|
||||
for (int ix = 0; ix < img.width; ix++) {
|
||||
float gray = ggml_tensor_get_f32(image_gray, ix, iy);
|
||||
gray = inverse ? 1.0f - gray : gray;
|
||||
ggml_tensor_set_f32(image, gray, ix, iy);
|
||||
|
|
@ -218,10 +218,11 @@ uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_thresh
|
|||
ggml_tensor_set_f32(image, gray, ix, iy, 2);
|
||||
}
|
||||
}
|
||||
free(img);
|
||||
uint8_t* output = sd_tensor_to_image(image);
|
||||
free(img.data);
|
||||
img.data = output;
|
||||
ggml_free(work_ctx);
|
||||
return output;
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif // __PREPROCESSING_HPP__
|
||||
261
otherarch/sdcpp/rope.hpp
Normal file
261
otherarch/sdcpp/rope.hpp
Normal file
|
|
@ -0,0 +1,261 @@
|
|||
#ifndef __ROPE_HPP__
|
||||
#define __ROPE_HPP__
|
||||
|
||||
#include <vector>
|
||||
#include "ggml_extend.hpp"
|
||||
|
||||
struct Rope {
|
||||
template <class T>
|
||||
static std::vector<T> linspace(T start, T end, int num) {
|
||||
std::vector<T> result(num);
|
||||
if (num == 1) {
|
||||
result[0] = start;
|
||||
return result;
|
||||
}
|
||||
T step = (end - start) / (num - 1);
|
||||
for (int i = 0; i < num; ++i) {
|
||||
result[i] = start + i * step;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::vector<std::vector<float>> transpose(const std::vector<std::vector<float>>& mat) {
|
||||
int rows = mat.size();
|
||||
int cols = mat[0].size();
|
||||
std::vector<std::vector<float>> transposed(cols, std::vector<float>(rows));
|
||||
for (int i = 0; i < rows; ++i) {
|
||||
for (int j = 0; j < cols; ++j) {
|
||||
transposed[j][i] = mat[i][j];
|
||||
}
|
||||
}
|
||||
return transposed;
|
||||
}
|
||||
|
||||
static std::vector<float> flatten(const std::vector<std::vector<float>>& vec) {
|
||||
std::vector<float> flat_vec;
|
||||
for (const auto& sub_vec : vec) {
|
||||
flat_vec.insert(flat_vec.end(), sub_vec.begin(), sub_vec.end());
|
||||
}
|
||||
return flat_vec;
|
||||
}
|
||||
|
||||
static std::vector<std::vector<float>> rope(const std::vector<float>& pos, int dim, int theta) {
|
||||
assert(dim % 2 == 0);
|
||||
int half_dim = dim / 2;
|
||||
|
||||
std::vector<float> scale = linspace(0.f, (dim * 1.f - 2) / dim, half_dim);
|
||||
|
||||
std::vector<float> omega(half_dim);
|
||||
for (int i = 0; i < half_dim; ++i) {
|
||||
omega[i] = 1.0 / std::pow(theta, scale[i]);
|
||||
}
|
||||
|
||||
int pos_size = pos.size();
|
||||
std::vector<std::vector<float>> out(pos_size, std::vector<float>(half_dim));
|
||||
for (int i = 0; i < pos_size; ++i) {
|
||||
for (int j = 0; j < half_dim; ++j) {
|
||||
out[i][j] = pos[i] * omega[j];
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> result(pos_size, std::vector<float>(half_dim * 4));
|
||||
for (int i = 0; i < pos_size; ++i) {
|
||||
for (int j = 0; j < half_dim; ++j) {
|
||||
result[i][4 * j] = std::cos(out[i][j]);
|
||||
result[i][4 * j + 1] = -std::sin(out[i][j]);
|
||||
result[i][4 * j + 2] = std::sin(out[i][j]);
|
||||
result[i][4 * j + 3] = std::cos(out[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Generate IDs for image patches and text
|
||||
static std::vector<std::vector<float>> gen_txt_ids(int bs, int context_len) {
|
||||
return std::vector<std::vector<float>>(bs * context_len, std::vector<float>(3, 0.0));
|
||||
}
|
||||
|
||||
static std::vector<std::vector<float>> gen_img_ids(int h, int w, int patch_size, int bs, int index = 0, int h_offset = 0, int w_offset = 0) {
|
||||
int h_len = (h + (patch_size / 2)) / patch_size;
|
||||
int w_len = (w + (patch_size / 2)) / patch_size;
|
||||
|
||||
std::vector<std::vector<float>> img_ids(h_len * w_len, std::vector<float>(3, 0.0));
|
||||
|
||||
std::vector<float> row_ids = linspace<float>(h_offset, h_len - 1 + h_offset, h_len);
|
||||
std::vector<float> col_ids = linspace<float>(w_offset, w_len - 1 + w_offset, w_len);
|
||||
|
||||
for (int i = 0; i < h_len; ++i) {
|
||||
for (int j = 0; j < w_len; ++j) {
|
||||
img_ids[i * w_len + j][0] = index;
|
||||
img_ids[i * w_len + j][1] = row_ids[i];
|
||||
img_ids[i * w_len + j][2] = col_ids[j];
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> img_ids_repeated(bs * img_ids.size(), std::vector<float>(3));
|
||||
for (int i = 0; i < bs; ++i) {
|
||||
for (int j = 0; j < img_ids.size(); ++j) {
|
||||
img_ids_repeated[i * img_ids.size() + j] = img_ids[j];
|
||||
}
|
||||
}
|
||||
return img_ids_repeated;
|
||||
}
|
||||
|
||||
static std::vector<std::vector<float>> concat_ids(const std::vector<std::vector<float>>& a,
|
||||
const std::vector<std::vector<float>>& b,
|
||||
int bs) {
|
||||
size_t a_len = a.size() / bs;
|
||||
size_t b_len = b.size() / bs;
|
||||
std::vector<std::vector<float>> ids(a.size() + b.size(), std::vector<float>(3));
|
||||
for (int i = 0; i < bs; ++i) {
|
||||
for (int j = 0; j < a_len; ++j) {
|
||||
ids[i * (a_len + b_len) + j] = a[i * a_len + j];
|
||||
}
|
||||
for (int j = 0; j < b_len; ++j) {
|
||||
ids[i * (a_len + b_len) + a_len + j] = b[i * b_len + j];
|
||||
}
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
static std::vector<float> embed_nd(const std::vector<std::vector<float>>& ids,
|
||||
int bs,
|
||||
int theta,
|
||||
const std::vector<int>& axes_dim) {
|
||||
std::vector<std::vector<float>> trans_ids = transpose(ids);
|
||||
size_t pos_len = ids.size() / bs;
|
||||
int num_axes = axes_dim.size();
|
||||
// for (int i = 0; i < pos_len; i++) {
|
||||
// std::cout << trans_ids[0][i] << " " << trans_ids[1][i] << " " << trans_ids[2][i] << std::endl;
|
||||
// }
|
||||
|
||||
int emb_dim = 0;
|
||||
for (int d : axes_dim)
|
||||
emb_dim += d / 2;
|
||||
|
||||
std::vector<std::vector<float>> emb(bs * pos_len, std::vector<float>(emb_dim * 2 * 2, 0.0));
|
||||
int offset = 0;
|
||||
for (int i = 0; i < num_axes; ++i) {
|
||||
std::vector<std::vector<float>> rope_emb = rope(trans_ids[i], axes_dim[i], theta); // [bs*pos_len, axes_dim[i]/2 * 2 * 2]
|
||||
for (int b = 0; b < bs; ++b) {
|
||||
for (int j = 0; j < pos_len; ++j) {
|
||||
for (int k = 0; k < rope_emb[0].size(); ++k) {
|
||||
emb[b * pos_len + j][offset + k] = rope_emb[j][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
offset += rope_emb[0].size();
|
||||
}
|
||||
|
||||
return flatten(emb);
|
||||
}
|
||||
|
||||
static std::vector<std::vector<float>> gen_flux_ids(int h,
|
||||
int w,
|
||||
int patch_size,
|
||||
int bs,
|
||||
int context_len,
|
||||
std::vector<ggml_tensor*> ref_latents,
|
||||
bool increase_ref_index) {
|
||||
auto txt_ids = gen_txt_ids(bs, context_len);
|
||||
auto img_ids = gen_img_ids(h, w, patch_size, bs);
|
||||
|
||||
auto ids = concat_ids(txt_ids, img_ids, bs);
|
||||
uint64_t curr_h_offset = 0;
|
||||
uint64_t curr_w_offset = 0;
|
||||
int index = 1;
|
||||
for (ggml_tensor* ref : ref_latents) {
|
||||
uint64_t h_offset = 0;
|
||||
uint64_t w_offset = 0;
|
||||
if (!increase_ref_index) {
|
||||
if (ref->ne[1] + curr_h_offset > ref->ne[0] + curr_w_offset) {
|
||||
w_offset = curr_w_offset;
|
||||
} else {
|
||||
h_offset = curr_h_offset;
|
||||
}
|
||||
}
|
||||
|
||||
auto ref_ids = gen_img_ids(ref->ne[1], ref->ne[0], patch_size, bs, index, h_offset, w_offset);
|
||||
ids = concat_ids(ids, ref_ids, bs);
|
||||
|
||||
if (increase_ref_index) {
|
||||
index++;
|
||||
}
|
||||
|
||||
curr_h_offset = std::max(curr_h_offset, ref->ne[1] + h_offset);
|
||||
curr_w_offset = std::max(curr_w_offset, ref->ne[0] + w_offset);
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
// Generate flux positional embeddings
|
||||
static std::vector<float> gen_flux_pe(int h,
|
||||
int w,
|
||||
int patch_size,
|
||||
int bs,
|
||||
int context_len,
|
||||
std::vector<ggml_tensor*> ref_latents,
|
||||
bool increase_ref_index,
|
||||
int theta,
|
||||
const std::vector<int>& axes_dim) {
|
||||
std::vector<std::vector<float>> ids = gen_flux_ids(h, w, patch_size, bs, context_len, ref_latents, increase_ref_index);
|
||||
return embed_nd(ids, bs, theta, axes_dim);
|
||||
}
|
||||
|
||||
static std::vector<std::vector<float>> gen_vid_ids(int t,
|
||||
int h,
|
||||
int w,
|
||||
int pt,
|
||||
int ph,
|
||||
int pw,
|
||||
int bs,
|
||||
int t_offset = 0,
|
||||
int h_offset = 0,
|
||||
int w_offset = 0) {
|
||||
int t_len = (t + (pt / 2)) / pt;
|
||||
int h_len = (h + (ph / 2)) / ph;
|
||||
int w_len = (w + (pw / 2)) / pw;
|
||||
|
||||
std::vector<std::vector<float>> vid_ids(t_len * h_len * w_len, std::vector<float>(3, 0.0));
|
||||
|
||||
std::vector<float> t_ids = linspace<float>(t_offset, t_len - 1 + t_offset, t_len);
|
||||
std::vector<float> h_ids = linspace<float>(h_offset, h_len - 1 + h_offset, h_len);
|
||||
std::vector<float> w_ids = linspace<float>(w_offset, w_len - 1 + w_offset, w_len);
|
||||
|
||||
for (int i = 0; i < t_len; ++i) {
|
||||
for (int j = 0; j < h_len; ++j) {
|
||||
for (int k = 0; k < w_len; ++k) {
|
||||
int idx = i * h_len * w_len + j * w_len + k;
|
||||
vid_ids[idx][0] = t_ids[i];
|
||||
vid_ids[idx][1] = h_ids[j];
|
||||
vid_ids[idx][2] = w_ids[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> vid_ids_repeated(bs * vid_ids.size(), std::vector<float>(3));
|
||||
for (int i = 0; i < bs; ++i) {
|
||||
for (int j = 0; j < vid_ids.size(); ++j) {
|
||||
vid_ids_repeated[i * vid_ids.size() + j] = vid_ids[j];
|
||||
}
|
||||
}
|
||||
return vid_ids_repeated;
|
||||
}
|
||||
|
||||
// Generate wan positional embeddings
|
||||
static std::vector<float> gen_wan_pe(int t,
|
||||
int h,
|
||||
int w,
|
||||
int pt,
|
||||
int ph,
|
||||
int pw,
|
||||
int bs,
|
||||
int theta,
|
||||
const std::vector<int>& axes_dim) {
|
||||
std::vector<std::vector<float>> ids = gen_vid_ids(t, h, w, pt, ph, pw, bs);
|
||||
return embed_nd(ids, bs, theta, axes_dim);
|
||||
}
|
||||
}; // struct Rope
|
||||
|
||||
#endif // __ROPE_HPP__
|
||||
|
|
@ -64,7 +64,6 @@ struct SDParams {
|
|||
float strength = 0.75f;
|
||||
int64_t seed = 42;
|
||||
bool clip_on_cpu = false;
|
||||
bool vae_on_cpu = false;
|
||||
bool diffusion_flash_attn = false;
|
||||
bool diffusion_conv_direct = false;
|
||||
bool vae_conv_direct = false;
|
||||
|
|
@ -93,11 +92,6 @@ static bool sd_is_quiet = false;
|
|||
static std::string sdmodelfilename = "";
|
||||
static bool photomaker_enabled = false;
|
||||
|
||||
static void set_sd_vae_tiling(sd_ctx_t* ctx, bool tiling)
|
||||
{
|
||||
ctx->sd->vae_tiling = tiling;
|
||||
}
|
||||
|
||||
static int get_loaded_sd_version(sd_ctx_t* ctx)
|
||||
{
|
||||
return ctx->sd->version;
|
||||
|
|
@ -252,10 +246,9 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
params.diffusion_model_path = sd_params->diffusion_model_path.c_str();
|
||||
params.vae_path = sd_params->vae_path.c_str();
|
||||
params.taesd_path = sd_params->taesd_path.c_str();
|
||||
params.stacked_id_embed_dir = sd_params->stacked_id_embeddings_path.c_str();
|
||||
params.photo_maker_path = sd_params->stacked_id_embeddings_path.c_str();
|
||||
|
||||
params.vae_decode_only = false;
|
||||
params.vae_tiling = false;
|
||||
params.free_params_immediately = false;
|
||||
params.rng_type = CUDA_RNG;
|
||||
|
||||
|
|
@ -279,7 +272,7 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
<< "\nDIFFUSION:" << params.diffusion_model_path
|
||||
<< "\nVAE:" << params.vae_path
|
||||
<< "\nTAESD:" << params.taesd_path
|
||||
<< "\nPHOTOMAKER:" << params.stacked_id_embed_dir
|
||||
<< "\nPHOTOMAKER:" << params.photo_maker_path
|
||||
<< "\nTHREADS:" << params.n_threads
|
||||
<< "\nWTYPE:" << params.wtype
|
||||
<< "\nDIFFUSIONFLASHATTN:" << (params.diffusion_flash_attn ? 1 : 0)
|
||||
|
|
@ -338,12 +331,12 @@ static std::string get_image_params(const sd_img_gen_params_t & params) {
|
|||
parameter_string << std::setprecision(3)
|
||||
<< "Prompt: " << params.prompt
|
||||
<< " | NegativePrompt: " << params.negative_prompt
|
||||
<< " | Steps: " << params.sample_steps
|
||||
<< " | CFGScale: " << params.guidance.txt_cfg
|
||||
<< " | Guidance: " << params.guidance.distilled_guidance
|
||||
<< " | Steps: " << params.sample_params.sample_steps
|
||||
<< " | CFGScale: " << params.sample_params.guidance.txt_cfg
|
||||
<< " | Guidance: " << params.sample_params.guidance.distilled_guidance
|
||||
<< " | Seed: " << params.seed
|
||||
<< " | Size: " << params.width << "x" << params.height
|
||||
<< " | Sampler: " << sd_sample_method_name(params.sample_method)
|
||||
<< " | Sampler: " << sd_sample_method_name(params.sample_params.sample_method)
|
||||
<< " | Clip skip: " << params.clip_skip
|
||||
<< " | Model: " << sdmodelfilename
|
||||
<< " | Version: KoboldCpp";
|
||||
|
|
@ -569,7 +562,6 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
|
||||
// trigger tiling by image area, the memory used for the VAE buffer is 6656 bytes per image pixel, default 768x768
|
||||
bool dotile = (sd_params->width*sd_params->height > cfg_tiled_vae_threshold*cfg_tiled_vae_threshold);
|
||||
set_sd_vae_tiling(sd_ctx,dotile); //changes vae tiling, prevents memory related crash/oom
|
||||
|
||||
//for img2img
|
||||
sd_image_t input_image = {0,0,0,nullptr};
|
||||
|
|
@ -698,26 +690,27 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
sd_img_gen_params_t params = {};
|
||||
sd_img_gen_params_init (¶ms);
|
||||
|
||||
params.batch_count = 1;
|
||||
|
||||
params.prompt = sd_params->prompt.c_str();
|
||||
params.negative_prompt = sd_params->negative_prompt.c_str();
|
||||
params.clip_skip = sd_params->clip_skip;
|
||||
params.guidance.txt_cfg = sd_params->cfg_scale;
|
||||
params.guidance.img_cfg = sd_params->cfg_scale;
|
||||
params.sample_params.guidance.txt_cfg = sd_params->cfg_scale;
|
||||
params.sample_params.guidance.img_cfg = sd_params->cfg_scale;
|
||||
params.width = sd_params->width;
|
||||
params.height = sd_params->height;
|
||||
params.sample_method = sd_params->sample_method;
|
||||
params.sample_steps = sd_params->sample_steps;
|
||||
params.sample_params.sample_method = sd_params->sample_method;
|
||||
params.sample_params.sample_steps = sd_params->sample_steps;
|
||||
params.seed = sd_params->seed;
|
||||
params.strength = sd_params->strength;
|
||||
params.vae_tiling_params.enabled = dotile;
|
||||
params.batch_count = 1;
|
||||
params.input_id_images_path = "";
|
||||
|
||||
params.ref_images = reference_imgs.data();
|
||||
params.ref_images_count = reference_imgs.size();
|
||||
|
||||
kcpp_img_gen_params_t extra_params = {};
|
||||
extra_params.photomaker_references = photomaker_imgs.data();
|
||||
extra_params.photomaker_reference_count = photomaker_imgs.size();
|
||||
params.pm_params.id_images = photomaker_imgs.data();
|
||||
params.pm_params.id_images_count = photomaker_imgs.size();
|
||||
|
||||
if (!is_img2img) {
|
||||
|
||||
|
|
@ -727,10 +720,10 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
ss << "\nTXT2IMG PROMPT:" << params.prompt
|
||||
<< "\nNPROMPT:" << params.negative_prompt
|
||||
<< "\nCLPSKP:" << params.clip_skip
|
||||
<< "\nCFGSCLE:" << params.guidance.txt_cfg
|
||||
<< "\nCFGSCLE:" << params.sample_params.guidance.txt_cfg
|
||||
<< "\nSIZE:" << params.width << "x" << params.height
|
||||
<< "\nSM:" << sd_sample_method_name(params.sample_method)
|
||||
<< "\nSTEP:" << params.sample_steps
|
||||
<< "\nSM:" << sd_sample_method_name(params.sample_params.sample_method)
|
||||
<< "\nSTEP:" << params.sample_params.sample_steps
|
||||
<< "\nSEED:" << params.seed
|
||||
<< "\nBATCH:" << params.batch_count
|
||||
<< "\n\n";
|
||||
|
|
@ -739,7 +732,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
|
||||
fflush(stdout);
|
||||
|
||||
results = generate_image(sd_ctx, ¶ms, &extra_params);
|
||||
results = generate_image(sd_ctx, ¶ms);
|
||||
|
||||
} else {
|
||||
|
||||
|
|
@ -838,10 +831,10 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
ss << "\nnIMG2IMG PROMPT:" << params.prompt
|
||||
<< "\nNPROMPT:" << params.negative_prompt
|
||||
<< "\nCLPSKP:" << params.clip_skip
|
||||
<< "\nCFGSCLE:" << params.guidance.txt_cfg
|
||||
<< "\nCFGSCLE:" << params.sample_params.guidance.txt_cfg
|
||||
<< "\nSIZE:" << params.width << "x" << params.height
|
||||
<< "\nSM:" << sd_sample_method_name(params.sample_method)
|
||||
<< "\nSTEP:" << params.sample_steps
|
||||
<< "\nSM:" << sd_sample_method_name(params.sample_params.sample_method)
|
||||
<< "\nSTEP:" << params.sample_params.sample_steps
|
||||
<< "\nSEED:" << params.seed
|
||||
<< "\nSTRENGTH:" << params.strength
|
||||
<< "\nBATCH:" << params.batch_count
|
||||
|
|
@ -851,7 +844,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
|
||||
fflush(stdout);
|
||||
|
||||
results = generate_image(sd_ctx, ¶ms, &extra_params);
|
||||
results = generate_image(sd_ctx, ¶ms);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -35,7 +35,7 @@ enum rng_type_t {
|
|||
};
|
||||
|
||||
enum sample_method_t {
|
||||
EULER_A,
|
||||
SAMPLE_METHOD_DEFAULT,
|
||||
EULER,
|
||||
HEUN,
|
||||
DPM2,
|
||||
|
|
@ -47,16 +47,20 @@ enum sample_method_t {
|
|||
LCM,
|
||||
DDIM_TRAILING,
|
||||
TCD,
|
||||
EULER_A,
|
||||
SAMPLE_METHOD_COUNT
|
||||
};
|
||||
|
||||
enum schedule_t {
|
||||
enum scheduler_t {
|
||||
DEFAULT,
|
||||
DISCRETE,
|
||||
KARRAS,
|
||||
EXPONENTIAL,
|
||||
AYS,
|
||||
GITS,
|
||||
SGM_UNIFORM,
|
||||
SIMPLE,
|
||||
SMOOTHSTEP,
|
||||
SCHEDULE_COUNT
|
||||
};
|
||||
|
||||
|
|
@ -101,7 +105,8 @@ enum sd_type_t {
|
|||
// SD_TYPE_IQ4_NL_4_4 = 36,
|
||||
// SD_TYPE_IQ4_NL_4_8 = 37,
|
||||
// SD_TYPE_IQ4_NL_8_8 = 38,
|
||||
SD_TYPE_COUNT = 40,
|
||||
SD_TYPE_MXFP4 = 39, // MXFP4 (1 block)
|
||||
SD_TYPE_COUNT = 40,
|
||||
};
|
||||
|
||||
enum sd_log_level_t {
|
||||
|
|
@ -111,25 +116,35 @@ enum sd_log_level_t {
|
|||
SD_LOG_ERROR
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
bool enabled;
|
||||
int tile_size_x;
|
||||
int tile_size_y;
|
||||
float target_overlap;
|
||||
float rel_size_x;
|
||||
float rel_size_y;
|
||||
} sd_tiling_params_t;
|
||||
|
||||
typedef struct {
|
||||
const char* model_path;
|
||||
const char* clip_l_path;
|
||||
const char* clip_g_path;
|
||||
const char* clip_vision_path;
|
||||
const char* t5xxl_path;
|
||||
const char* diffusion_model_path;
|
||||
const char* high_noise_diffusion_model_path;
|
||||
const char* vae_path;
|
||||
const char* taesd_path;
|
||||
const char* control_net_path;
|
||||
const char* lora_model_dir;
|
||||
const char* embedding_dir;
|
||||
const char* stacked_id_embed_dir;
|
||||
const char* photo_maker_path;
|
||||
bool vae_decode_only;
|
||||
bool vae_tiling;
|
||||
bool free_params_immediately;
|
||||
int n_threads;
|
||||
enum sd_type_t wtype;
|
||||
enum rng_type_t rng_type;
|
||||
enum schedule_t schedule;
|
||||
bool offload_params_to_cpu;
|
||||
bool keep_clip_on_cpu;
|
||||
bool keep_control_net_on_cpu;
|
||||
bool keep_vae_on_cpu;
|
||||
|
|
@ -139,6 +154,7 @@ typedef struct {
|
|||
bool chroma_use_dit_mask;
|
||||
bool chroma_use_t5_mask;
|
||||
int chroma_t5_mask_pad;
|
||||
float flow_shift;
|
||||
} sd_ctx_params_t;
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -159,53 +175,64 @@ typedef struct {
|
|||
typedef struct {
|
||||
float txt_cfg;
|
||||
float img_cfg;
|
||||
float min_cfg;
|
||||
float distilled_guidance;
|
||||
sd_slg_params_t slg;
|
||||
} sd_guidance_params_t;
|
||||
|
||||
typedef struct {
|
||||
const char* prompt;
|
||||
const char* negative_prompt;
|
||||
int clip_skip;
|
||||
sd_guidance_params_t guidance;
|
||||
sd_image_t init_image;
|
||||
sd_image_t* ref_images;
|
||||
int ref_images_count;
|
||||
sd_image_t mask_image;
|
||||
int width;
|
||||
int height;
|
||||
enum scheduler_t scheduler;
|
||||
enum sample_method_t sample_method;
|
||||
int sample_steps;
|
||||
float eta;
|
||||
int shifted_timestep;
|
||||
} sd_sample_params_t;
|
||||
|
||||
typedef struct {
|
||||
sd_image_t* id_images;
|
||||
int id_images_count;
|
||||
const char* id_embed_path;
|
||||
float style_strength;
|
||||
} sd_pm_params_t; // photo maker
|
||||
|
||||
typedef struct {
|
||||
const char* prompt;
|
||||
const char* negative_prompt;
|
||||
int clip_skip;
|
||||
sd_image_t init_image;
|
||||
sd_image_t* ref_images;
|
||||
int ref_images_count;
|
||||
bool increase_ref_index;
|
||||
sd_image_t mask_image;
|
||||
int width;
|
||||
int height;
|
||||
sd_sample_params_t sample_params;
|
||||
float strength;
|
||||
int64_t seed;
|
||||
int batch_count;
|
||||
const sd_image_t* control_cond;
|
||||
sd_image_t control_image;
|
||||
float control_strength;
|
||||
float style_strength;
|
||||
bool normalize_input;
|
||||
const char* input_id_images_path;
|
||||
sd_pm_params_t pm_params;
|
||||
sd_tiling_params_t vae_tiling_params;
|
||||
} sd_img_gen_params_t;
|
||||
|
||||
typedef struct {
|
||||
sd_image_t* photomaker_references;
|
||||
int photomaker_reference_count;
|
||||
} kcpp_img_gen_params_t;
|
||||
|
||||
typedef struct {
|
||||
const char* prompt;
|
||||
const char* negative_prompt;
|
||||
int clip_skip;
|
||||
sd_image_t init_image;
|
||||
sd_image_t end_image;
|
||||
sd_image_t* control_frames;
|
||||
int control_frames_size;
|
||||
int width;
|
||||
int height;
|
||||
sd_guidance_params_t guidance;
|
||||
enum sample_method_t sample_method;
|
||||
int sample_steps;
|
||||
sd_sample_params_t sample_params;
|
||||
sd_sample_params_t high_noise_sample_params;
|
||||
float moe_boundary;
|
||||
float strength;
|
||||
int64_t seed;
|
||||
int video_frames;
|
||||
int motion_bucket_id;
|
||||
int fps;
|
||||
float augmentation_level;
|
||||
float vace_strength;
|
||||
} sd_vid_gen_params_t;
|
||||
|
||||
typedef struct sd_ctx_t sd_ctx_t;
|
||||
|
|
@ -224,30 +251,37 @@ SD_API const char* sd_rng_type_name(enum rng_type_t rng_type);
|
|||
SD_API enum rng_type_t str_to_rng_type(const char* str);
|
||||
SD_API const char* sd_sample_method_name(enum sample_method_t sample_method);
|
||||
SD_API enum sample_method_t str_to_sample_method(const char* str);
|
||||
SD_API const char* sd_schedule_name(enum schedule_t schedule);
|
||||
SD_API enum schedule_t str_to_schedule(const char* str);
|
||||
SD_API const char* sd_schedule_name(enum scheduler_t scheduler);
|
||||
SD_API enum scheduler_t str_to_schedule(const char* str);
|
||||
|
||||
SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params);
|
||||
SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params);
|
||||
|
||||
SD_API sd_ctx_t* new_sd_ctx(const sd_ctx_params_t* sd_ctx_params);
|
||||
SD_API void free_sd_ctx(sd_ctx_t* sd_ctx);
|
||||
SD_API enum sample_method_t sd_get_default_sample_method(const sd_ctx_t* sd_ctx);
|
||||
|
||||
SD_API void sd_sample_params_init(sd_sample_params_t* sample_params);
|
||||
SD_API char* sd_sample_params_to_str(const sd_sample_params_t* sample_params);
|
||||
|
||||
SD_API void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params);
|
||||
SD_API char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params);
|
||||
SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params, const kcpp_img_gen_params_t* kcpp_img_gen_params);
|
||||
SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params);
|
||||
|
||||
SD_API void sd_vid_gen_params_init(sd_vid_gen_params_t* sd_vid_gen_params);
|
||||
SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* sd_vid_gen_params); // broken
|
||||
SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* sd_vid_gen_params, int* num_frames_out);
|
||||
|
||||
typedef struct upscaler_ctx_t upscaler_ctx_t;
|
||||
|
||||
SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
|
||||
int n_threads,
|
||||
bool direct);
|
||||
bool offload_params_to_cpu,
|
||||
bool direct,
|
||||
int n_threads);
|
||||
SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);
|
||||
|
||||
SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor);
|
||||
SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx,
|
||||
sd_image_t input_image,
|
||||
uint32_t upscale_factor);
|
||||
|
||||
SD_API bool convert(const char* input_path,
|
||||
const char* vae_path,
|
||||
|
|
@ -255,14 +289,12 @@ SD_API bool convert(const char* input_path,
|
|||
enum sd_type_t output_type,
|
||||
const char* tensor_type_rules);
|
||||
|
||||
SD_API uint8_t* preprocess_canny(uint8_t* img,
|
||||
int width,
|
||||
int height,
|
||||
float high_threshold,
|
||||
float low_threshold,
|
||||
float weak,
|
||||
float strong,
|
||||
bool inverse);
|
||||
SD_API bool preprocess_canny(sd_image_t image,
|
||||
float high_threshold,
|
||||
float low_threshold,
|
||||
float weak,
|
||||
float strong,
|
||||
bool inverse);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -124,7 +124,10 @@ protected:
|
|||
return;
|
||||
}
|
||||
std::string piece = item[0];
|
||||
float score = item[1];
|
||||
if (piece.empty()) {
|
||||
piece = "<empty_token>";
|
||||
}
|
||||
float score = item[1];
|
||||
piece_score_pairs.emplace_back(piece, score);
|
||||
}
|
||||
}
|
||||
|
|
@ -147,6 +150,7 @@ protected:
|
|||
std::vector<const char*> key(pieces->size());
|
||||
std::vector<int> value(pieces->size());
|
||||
for (size_t i = 0; i < pieces->size(); ++i) {
|
||||
// LOG_DEBUG("%s %d", (*pieces)[i].first.c_str(), (*pieces)[i].second);
|
||||
key[i] = (*pieces)[i].first.data(); // sorted piece.
|
||||
value[i] = (*pieces)[i].second; // vocab_id
|
||||
}
|
||||
|
|
@ -335,9 +339,9 @@ protected:
|
|||
}
|
||||
|
||||
public:
|
||||
explicit T5UniGramTokenizer(const std::string& json_str = "") {
|
||||
if (json_str.size() != 0) {
|
||||
InitializePieces(json_str);
|
||||
explicit T5UniGramTokenizer(bool is_umt5 = false) {
|
||||
if (is_umt5) {
|
||||
InitializePieces(ModelLoader::load_umt5_tokenizer_json());
|
||||
} else {
|
||||
InitializePieces(ModelLoader::load_t5_tokenizer_json());
|
||||
}
|
||||
|
|
@ -574,6 +578,7 @@ public:
|
|||
|
||||
// x: [N, n_token, model_dim]
|
||||
std::pair<struct ggml_tensor*, struct ggml_tensor*> forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* past_bias = NULL,
|
||||
struct ggml_tensor* mask = NULL,
|
||||
|
|
@ -604,7 +609,7 @@ public:
|
|||
|
||||
k = ggml_scale_inplace(ctx, k, sqrt(d_head));
|
||||
|
||||
x = ggml_nn_attention_ext(ctx, q, k, v, num_heads, mask); // [N, n_token, d_head * n_head]
|
||||
x = ggml_nn_attention_ext(ctx, backend, q, k, v, num_heads, mask); // [N, n_token, d_head * n_head]
|
||||
|
||||
x = out_proj->forward(ctx, x); // [N, n_token, model_dim]
|
||||
return {x, past_bias};
|
||||
|
|
@ -623,6 +628,7 @@ public:
|
|||
}
|
||||
|
||||
std::pair<struct ggml_tensor*, struct ggml_tensor*> forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* past_bias = NULL,
|
||||
struct ggml_tensor* mask = NULL,
|
||||
|
|
@ -632,7 +638,7 @@ public:
|
|||
auto layer_norm = std::dynamic_pointer_cast<T5LayerNorm>(blocks["layer_norm"]);
|
||||
|
||||
auto normed_hidden_state = layer_norm->forward(ctx, x);
|
||||
auto ret = SelfAttention->forward(ctx, normed_hidden_state, past_bias, mask, relative_position_bucket);
|
||||
auto ret = SelfAttention->forward(ctx, backend, normed_hidden_state, past_bias, mask, relative_position_bucket);
|
||||
auto output = ret.first;
|
||||
past_bias = ret.second;
|
||||
|
||||
|
|
@ -649,6 +655,7 @@ public:
|
|||
}
|
||||
|
||||
std::pair<struct ggml_tensor*, struct ggml_tensor*> forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* past_bias = NULL,
|
||||
struct ggml_tensor* mask = NULL,
|
||||
|
|
@ -657,7 +664,7 @@ public:
|
|||
auto layer_0 = std::dynamic_pointer_cast<T5LayerSelfAttention>(blocks["layer.0"]);
|
||||
auto layer_1 = std::dynamic_pointer_cast<T5LayerFF>(blocks["layer.1"]);
|
||||
|
||||
auto ret = layer_0->forward(ctx, x, past_bias, mask, relative_position_bucket);
|
||||
auto ret = layer_0->forward(ctx, backend, x, past_bias, mask, relative_position_bucket);
|
||||
x = ret.first;
|
||||
past_bias = ret.second;
|
||||
x = layer_1->forward(ctx, x);
|
||||
|
|
@ -673,16 +680,18 @@ public:
|
|||
int64_t model_dim,
|
||||
int64_t inner_dim,
|
||||
int64_t ff_dim,
|
||||
int64_t num_heads)
|
||||
int64_t num_heads,
|
||||
bool relative_attention = true)
|
||||
: num_layers(num_layers) {
|
||||
for (int i = 0; i < num_layers; i++) {
|
||||
blocks["block." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new T5Block(model_dim, inner_dim, ff_dim, num_heads, i == 0));
|
||||
blocks["block." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new T5Block(model_dim, inner_dim, ff_dim, num_heads, (!relative_attention || i == 0)));
|
||||
}
|
||||
|
||||
blocks["final_layer_norm"] = std::shared_ptr<GGMLBlock>(new T5LayerNorm(model_dim));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* past_bias = NULL,
|
||||
struct ggml_tensor* attention_mask = NULL,
|
||||
|
|
@ -691,7 +700,7 @@ public:
|
|||
for (int i = 0; i < num_layers; i++) {
|
||||
auto block = std::dynamic_pointer_cast<T5Block>(blocks["block." + std::to_string(i)]);
|
||||
|
||||
auto ret = block->forward(ctx, x, past_bias, attention_mask, relative_position_bucket);
|
||||
auto ret = block->forward(ctx, backend, x, past_bias, attention_mask, relative_position_bucket);
|
||||
x = ret.first;
|
||||
past_bias = ret.second;
|
||||
}
|
||||
|
|
@ -703,18 +712,34 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
struct T5Params {
|
||||
int64_t num_layers = 24;
|
||||
int64_t model_dim = 4096;
|
||||
int64_t ff_dim = 10240;
|
||||
int64_t num_heads = 64;
|
||||
int64_t vocab_size = 32128;
|
||||
bool relative_attention = true;
|
||||
};
|
||||
|
||||
struct T5 : public GGMLBlock {
|
||||
T5Params params;
|
||||
|
||||
public:
|
||||
T5(int64_t num_layers,
|
||||
int64_t model_dim,
|
||||
int64_t ff_dim,
|
||||
int64_t num_heads,
|
||||
int64_t vocab_size) {
|
||||
blocks["encoder"] = std::shared_ptr<GGMLBlock>(new T5Stack(num_layers, model_dim, model_dim, ff_dim, num_heads));
|
||||
blocks["shared"] = std::shared_ptr<GGMLBlock>(new Embedding(vocab_size, model_dim));
|
||||
T5() {}
|
||||
T5(T5Params params)
|
||||
: params(params) {
|
||||
blocks["encoder"] = std::shared_ptr<GGMLBlock>(new T5Stack(params.num_layers,
|
||||
params.model_dim,
|
||||
params.model_dim,
|
||||
params.ff_dim,
|
||||
params.num_heads,
|
||||
params.relative_attention));
|
||||
blocks["shared"] = std::shared_ptr<GGMLBlock>(new Embedding(params.vocab_size,
|
||||
params.model_dim));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* input_ids,
|
||||
struct ggml_tensor* past_bias = NULL,
|
||||
struct ggml_tensor* attention_mask = NULL,
|
||||
|
|
@ -725,24 +750,27 @@ public:
|
|||
auto encoder = std::dynamic_pointer_cast<T5Stack>(blocks["encoder"]);
|
||||
|
||||
auto x = shared->forward(ctx, input_ids);
|
||||
x = encoder->forward(ctx, x, past_bias, attention_mask, relative_position_bucket);
|
||||
x = encoder->forward(ctx, backend, x, past_bias, attention_mask, relative_position_bucket);
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
struct T5Runner : public GGMLRunner {
|
||||
T5Params params;
|
||||
T5 model;
|
||||
std::vector<int> relative_position_bucket_vec;
|
||||
|
||||
T5Runner(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types,
|
||||
const std::string prefix,
|
||||
int64_t num_layers = 24,
|
||||
int64_t model_dim = 4096,
|
||||
int64_t ff_dim = 10240,
|
||||
int64_t num_heads = 64,
|
||||
int64_t vocab_size = 32128)
|
||||
: GGMLRunner(backend), model(num_layers, model_dim, ff_dim, num_heads, vocab_size) {
|
||||
bool is_umt5 = false)
|
||||
: GGMLRunner(backend, offload_params_to_cpu) {
|
||||
if (is_umt5) {
|
||||
params.vocab_size = 256384;
|
||||
params.relative_attention = false;
|
||||
}
|
||||
model = T5(params);
|
||||
model.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
|
|
@ -755,13 +783,14 @@ struct T5Runner : public GGMLRunner {
|
|||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* input_ids,
|
||||
struct ggml_tensor* relative_position_bucket,
|
||||
struct ggml_tensor* attention_mask = NULL) {
|
||||
size_t N = input_ids->ne[1];
|
||||
size_t n_token = input_ids->ne[0];
|
||||
|
||||
auto hidden_states = model.forward(ctx, input_ids, NULL, attention_mask, relative_position_bucket); // [N, n_token, model_dim]
|
||||
auto hidden_states = model.forward(ctx, backend, input_ids, NULL, attention_mask, relative_position_bucket); // [N, n_token, model_dim]
|
||||
return hidden_states;
|
||||
}
|
||||
|
||||
|
|
@ -769,7 +798,8 @@ struct T5Runner : public GGMLRunner {
|
|||
struct ggml_tensor* attention_mask = NULL) {
|
||||
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);
|
||||
|
||||
input_ids = to_backend(input_ids);
|
||||
input_ids = to_backend(input_ids);
|
||||
attention_mask = to_backend(attention_mask);
|
||||
|
||||
relative_position_bucket_vec = compute_relative_position_bucket(input_ids->ne[0], input_ids->ne[0]);
|
||||
|
||||
|
|
@ -786,7 +816,7 @@ struct T5Runner : public GGMLRunner {
|
|||
input_ids->ne[0]);
|
||||
set_backend_tensor_data(relative_position_bucket, relative_position_bucket_vec.data());
|
||||
|
||||
struct ggml_tensor* hidden_states = forward(compute_ctx, input_ids, relative_position_bucket, attention_mask);
|
||||
struct ggml_tensor* hidden_states = forward(compute_ctx, runtime_backend, input_ids, relative_position_bucket, attention_mask);
|
||||
|
||||
ggml_build_forward_expand(gf, hidden_states);
|
||||
|
||||
|
|
@ -877,14 +907,11 @@ struct T5Embedder {
|
|||
T5Runner model;
|
||||
|
||||
T5Embedder(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
const std::string prefix = "",
|
||||
int64_t num_layers = 24,
|
||||
int64_t model_dim = 4096,
|
||||
int64_t ff_dim = 10240,
|
||||
int64_t num_heads = 64,
|
||||
int64_t vocab_size = 32128)
|
||||
: model(backend, tensor_types, prefix, num_layers, model_dim, ff_dim, num_heads, vocab_size) {
|
||||
bool is_umt5 = false)
|
||||
: model(backend, offload_params_to_cpu, tensor_types, prefix, is_umt5), tokenizer(is_umt5) {
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
||||
|
|
@ -946,25 +973,22 @@ struct T5Embedder {
|
|||
GGML_ASSERT(work_ctx != NULL);
|
||||
|
||||
{
|
||||
// cpu f16: pass
|
||||
// cpu f32: pass
|
||||
// cuda f16: nan
|
||||
// cuda f32: pass
|
||||
// cuda q8_0: nan
|
||||
// TODO: fix cuda nan
|
||||
std::string text("a lovely cat");
|
||||
auto tokens_and_weights = tokenize(text, 77, true);
|
||||
// std::string text("一只可爱的猫"); // umt5 chinease test
|
||||
auto tokens_and_weights = tokenize(text, 512, true);
|
||||
std::vector<int>& tokens = std::get<0>(tokens_and_weights);
|
||||
std::vector<float>& weights = std::get<1>(tokens_and_weights);
|
||||
std::vector<float>& masks = std::get<2>(tokens_and_weights);
|
||||
for (auto token : tokens) {
|
||||
printf("%d ", token);
|
||||
}
|
||||
printf("\n");
|
||||
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, tokens);
|
||||
auto attention_mask = vector_to_ggml_tensor(work_ctx, masks);
|
||||
struct ggml_tensor* out = NULL;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
model.compute(8, input_ids, NULL, &out, work_ctx);
|
||||
model.compute(8, input_ids, attention_mask, &out, work_ctx);
|
||||
int t1 = ggml_time_ms();
|
||||
|
||||
print_ggml_tensor(out);
|
||||
|
|
@ -973,32 +997,43 @@ struct T5Embedder {
|
|||
}
|
||||
|
||||
static void load_from_file_and_test(const std::string& file_path) {
|
||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_F32;
|
||||
std::shared_ptr<T5Embedder> t5 = std::shared_ptr<T5Embedder>(new T5Embedder(backend));
|
||||
{
|
||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||
// cpu f16: pass
|
||||
// cpu f32: pass
|
||||
// cuda f16: pass
|
||||
// cuda f32: pass
|
||||
// cuda q8_0: pass
|
||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_F16;
|
||||
|
||||
t5->alloc_params_buffer();
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
t5->get_param_tensors(tensors, "");
|
||||
|
||||
ModelLoader model_loader;
|
||||
if (!model_loader.init_from_file(file_path)) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
bool success = model_loader.load_tensors(tensors, backend);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_INFO("t5 model loaded");
|
||||
ModelLoader model_loader;
|
||||
if (!model_loader.init_from_file(file_path)) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
auto tensor_types = model_loader.tensor_storages_types;
|
||||
for (auto& item : tensor_types) {
|
||||
// LOG_DEBUG("%s %u", item.first.c_str(), item.second);
|
||||
if (ends_with(item.first, "weight")) {
|
||||
item.second = model_data_type;
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<T5Embedder> t5 = std::shared_ptr<T5Embedder>(new T5Embedder(backend, false, tensor_types, "", true));
|
||||
|
||||
t5->alloc_params_buffer();
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
t5->get_param_tensors(tensors, "");
|
||||
|
||||
bool success = model_loader.load_tensors(tensors);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_INFO("t5 model loaded");
|
||||
t5->test();
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -196,13 +196,14 @@ struct TinyAutoEncoder : public GGMLRunner {
|
|||
bool decode_only = false;
|
||||
|
||||
TinyAutoEncoder(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types,
|
||||
const std::string prefix,
|
||||
bool decoder_only = true,
|
||||
SDVersion version = VERSION_SD1)
|
||||
: decode_only(decoder_only),
|
||||
taesd(decoder_only, version),
|
||||
GGMLRunner(backend) {
|
||||
GGMLRunner(backend, offload_params_to_cpu) {
|
||||
taesd.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
|
|
@ -221,7 +222,7 @@ struct TinyAutoEncoder : public GGMLRunner {
|
|||
return "taesd";
|
||||
}
|
||||
|
||||
bool load_from_file(const std::string& file_path) {
|
||||
bool load_from_file(const std::string& file_path, int n_threads) {
|
||||
LOG_INFO("loading taesd from '%s', decode_only = %s", file_path.c_str(), decode_only ? "true" : "false");
|
||||
alloc_params_buffer();
|
||||
std::map<std::string, ggml_tensor*> taesd_tensors;
|
||||
|
|
@ -237,7 +238,7 @@ struct TinyAutoEncoder : public GGMLRunner {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool success = model_loader.load_tensors(taesd_tensors, backend, ignore_tensors);
|
||||
bool success = model_loader.load_tensors(taesd_tensors, ignore_tensors, n_threads);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tae tensors from model loader failed");
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ public:
|
|||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* context,
|
||||
int timesteps) {
|
||||
|
|
@ -127,7 +128,7 @@ public:
|
|||
auto block = std::dynamic_pointer_cast<BasicTransformerBlock>(blocks[transformer_name]);
|
||||
auto mix_block = std::dynamic_pointer_cast<BasicTransformerBlock>(blocks[time_stack_name]);
|
||||
|
||||
x = block->forward(ctx, x, spatial_context); // [N, h * w, inner_dim]
|
||||
x = block->forward(ctx, backend, x, spatial_context); // [N, h * w, inner_dim]
|
||||
|
||||
// in_channels == inner_dim
|
||||
auto x_mix = x;
|
||||
|
|
@ -143,7 +144,7 @@ public:
|
|||
x_mix = ggml_cont(ctx, ggml_permute(ctx, x_mix, 0, 2, 1, 3)); // b t s c -> b s t c
|
||||
x_mix = ggml_reshape_3d(ctx, x_mix, C, T, S * B); // b s t c -> (b s) t c
|
||||
|
||||
x_mix = mix_block->forward(ctx, x_mix, time_context); // [B * h * w, T, inner_dim]
|
||||
x_mix = mix_block->forward(ctx, backend, x_mix, time_context); // [B * h * w, T, inner_dim]
|
||||
|
||||
x_mix = ggml_reshape_4d(ctx, x_mix, C, T, S, B); // (b s) t c -> b s t c
|
||||
x_mix = ggml_cont(ctx, ggml_permute(ctx, x_mix, 0, 2, 1, 3)); // b s t c -> b t s c
|
||||
|
|
@ -363,21 +364,23 @@ public:
|
|||
|
||||
struct ggml_tensor* attention_layer_forward(std::string name,
|
||||
struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* context,
|
||||
int timesteps) {
|
||||
if (version == VERSION_SVD) {
|
||||
auto block = std::dynamic_pointer_cast<SpatialVideoTransformer>(blocks[name]);
|
||||
|
||||
return block->forward(ctx, x, context, timesteps);
|
||||
return block->forward(ctx, backend, x, context, timesteps);
|
||||
} else {
|
||||
auto block = std::dynamic_pointer_cast<SpatialTransformer>(blocks[name]);
|
||||
|
||||
return block->forward(ctx, x, context);
|
||||
return block->forward(ctx, backend, x, context);
|
||||
}
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* timesteps,
|
||||
struct ggml_tensor* context,
|
||||
|
|
@ -456,7 +459,7 @@ public:
|
|||
h = resblock_forward(name, ctx, h, emb, num_video_frames); // [N, mult*model_channels, h, w]
|
||||
if (std::find(attention_resolutions.begin(), attention_resolutions.end(), ds) != attention_resolutions.end()) {
|
||||
std::string name = "input_blocks." + std::to_string(input_block_idx) + ".1";
|
||||
h = attention_layer_forward(name, ctx, h, context, num_video_frames); // [N, mult*model_channels, h, w]
|
||||
h = attention_layer_forward(name, ctx, backend, h, context, num_video_frames); // [N, mult*model_channels, h, w]
|
||||
}
|
||||
hs.push_back(h);
|
||||
}
|
||||
|
|
@ -474,9 +477,9 @@ public:
|
|||
// [N, 4*model_channels, h/8, w/8]
|
||||
|
||||
// middle_block
|
||||
h = resblock_forward("middle_block.0", ctx, h, emb, num_video_frames); // [N, 4*model_channels, h/8, w/8]
|
||||
h = attention_layer_forward("middle_block.1", ctx, h, context, num_video_frames); // [N, 4*model_channels, h/8, w/8]
|
||||
h = resblock_forward("middle_block.2", ctx, h, emb, num_video_frames); // [N, 4*model_channels, h/8, w/8]
|
||||
h = resblock_forward("middle_block.0", ctx, h, emb, num_video_frames); // [N, 4*model_channels, h/8, w/8]
|
||||
h = attention_layer_forward("middle_block.1", ctx, backend, h, context, num_video_frames); // [N, 4*model_channels, h/8, w/8]
|
||||
h = resblock_forward("middle_block.2", ctx, h, emb, num_video_frames); // [N, 4*model_channels, h/8, w/8]
|
||||
|
||||
if (controls.size() > 0) {
|
||||
auto cs = ggml_scale_inplace(ctx, controls[controls.size() - 1], control_strength);
|
||||
|
|
@ -507,7 +510,7 @@ public:
|
|||
if (std::find(attention_resolutions.begin(), attention_resolutions.end(), ds) != attention_resolutions.end()) {
|
||||
std::string name = "output_blocks." + std::to_string(output_block_idx) + ".1";
|
||||
|
||||
h = attention_layer_forward(name, ctx, h, context, num_video_frames);
|
||||
h = attention_layer_forward(name, ctx, backend, h, context, num_video_frames);
|
||||
|
||||
up_sample_idx++;
|
||||
}
|
||||
|
|
@ -538,11 +541,12 @@ struct UNetModelRunner : public GGMLRunner {
|
|||
UnetModelBlock unet;
|
||||
|
||||
UNetModelRunner(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types,
|
||||
const std::string prefix,
|
||||
SDVersion version = VERSION_SD1,
|
||||
bool flash_attn = false)
|
||||
: GGMLRunner(backend), unet(version, tensor_types, flash_attn) {
|
||||
: GGMLRunner(backend, offload_params_to_cpu), unet(version, tensor_types, flash_attn) {
|
||||
unet.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
|
|
@ -591,6 +595,7 @@ struct UNetModelRunner : public GGMLRunner {
|
|||
}
|
||||
|
||||
struct ggml_tensor* out = unet.forward(compute_ctx,
|
||||
runtime_backend,
|
||||
x,
|
||||
timesteps,
|
||||
context,
|
||||
|
|
|
|||
|
|
@ -17,14 +17,16 @@ struct UpscalerGGML {
|
|||
direct(direct) {
|
||||
}
|
||||
|
||||
bool load_from_file(const std::string& esrgan_path) {
|
||||
bool load_from_file(const std::string& esrgan_path,
|
||||
bool offload_params_to_cpu,
|
||||
int n_threads) {
|
||||
ggml_log_set(ggml_log_callback_default, nullptr);
|
||||
#ifdef SD_USE_CUDA
|
||||
LOG_DEBUG("Using CUDA backend");
|
||||
backend = ggml_backend_cuda_init(0);
|
||||
#endif
|
||||
#ifdef SD_USE_METAL
|
||||
LOG_DEBUG("Using Metal backend");
|
||||
ggml_log_set(ggml_log_callback_default, nullptr);
|
||||
backend = ggml_backend_metal_init();
|
||||
#endif
|
||||
#ifdef SD_USE_VULKAN
|
||||
|
|
@ -49,11 +51,11 @@ struct UpscalerGGML {
|
|||
backend = ggml_backend_cpu_init();
|
||||
}
|
||||
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
|
||||
esrgan_upscaler = std::make_shared<ESRGAN>(backend, model_loader.tensor_storages_types);
|
||||
esrgan_upscaler = std::make_shared<ESRGAN>(backend, offload_params_to_cpu, model_loader.tensor_storages_types);
|
||||
if (direct) {
|
||||
esrgan_upscaler->enable_conv2d_direct();
|
||||
}
|
||||
if (!esrgan_upscaler->load_from_file(esrgan_path)) {
|
||||
if (!esrgan_upscaler->load_from_file(esrgan_path, n_threads)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
|
@ -68,8 +70,7 @@ struct UpscalerGGML {
|
|||
input_image.width, input_image.height, output_width, output_height);
|
||||
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = output_width * output_height * 3 * sizeof(float) * 2;
|
||||
params.mem_size += 2 * ggml_tensor_overhead();
|
||||
params.mem_size = static_cast<size_t>(1024 * 1024) * 1024; // 1G
|
||||
params.mem_buffer = NULL;
|
||||
params.no_alloc = false;
|
||||
|
||||
|
|
@ -79,9 +80,9 @@ struct UpscalerGGML {
|
|||
LOG_ERROR("ggml_init() failed");
|
||||
return upscaled_image;
|
||||
}
|
||||
LOG_DEBUG("upscale work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f);
|
||||
// LOG_DEBUG("upscale work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f);
|
||||
ggml_tensor* input_image_tensor = ggml_new_tensor_4d(upscale_ctx, GGML_TYPE_F32, input_image.width, input_image.height, 3, 1);
|
||||
sd_image_to_tensor(input_image.data, input_image_tensor);
|
||||
sd_image_to_tensor(input_image, input_image_tensor);
|
||||
|
||||
ggml_tensor* upscaled = ggml_new_tensor_4d(upscale_ctx, GGML_TYPE_F32, output_width, output_height, 3, 1);
|
||||
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
|
||||
|
|
@ -110,8 +111,9 @@ struct upscaler_ctx_t {
|
|||
};
|
||||
|
||||
upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
|
||||
int n_threads,
|
||||
bool direct = false) {
|
||||
bool offload_params_to_cpu,
|
||||
bool direct,
|
||||
int n_threads) {
|
||||
upscaler_ctx_t* upscaler_ctx = (upscaler_ctx_t*)malloc(sizeof(upscaler_ctx_t));
|
||||
if (upscaler_ctx == NULL) {
|
||||
return NULL;
|
||||
|
|
@ -123,7 +125,7 @@ upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (!upscaler_ctx->upscaler->load_from_file(esrgan_path)) {
|
||||
if (!upscaler_ctx->upscaler->load_from_file(esrgan_path, offload_params_to_cpu, n_threads)) {
|
||||
delete upscaler_ctx->upscaler;
|
||||
upscaler_ctx->upscaler = NULL;
|
||||
free(upscaler_ctx);
|
||||
|
|
|
|||
|
|
@ -62,6 +62,17 @@ void replace_all_chars(std::string& str, char target, char replacement) {
|
|||
}
|
||||
}
|
||||
|
||||
int round_up_to(int value, int base) {
|
||||
if (base <= 0) {
|
||||
return value;
|
||||
}
|
||||
if (value % base == 0) {
|
||||
return value;
|
||||
} else {
|
||||
return ((value / base) + 1) * base;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _WIN32 // code for windows
|
||||
#include <windows.h>
|
||||
|
||||
|
|
@ -89,56 +100,6 @@ std::string get_full_path(const std::string& dir, const std::string& filename) {
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> get_files_from_dir(const std::string& dir) {
|
||||
std::vector<std::string> files;
|
||||
|
||||
WIN32_FIND_DATA findFileData;
|
||||
HANDLE hFind;
|
||||
|
||||
char currentDirectory[MAX_PATH];
|
||||
GetCurrentDirectory(MAX_PATH, currentDirectory);
|
||||
|
||||
char directoryPath[MAX_PATH]; // this is absolute path
|
||||
sprintf(directoryPath, "%s\\%s\\*", currentDirectory, dir.c_str());
|
||||
|
||||
// Find the first file in the directory
|
||||
hFind = FindFirstFile(directoryPath, &findFileData);
|
||||
bool isAbsolutePath = false;
|
||||
// Check if the directory was found
|
||||
if (hFind == INVALID_HANDLE_VALUE) {
|
||||
printf("Unable to find directory. Try with original path \n");
|
||||
|
||||
char directoryPathAbsolute[MAX_PATH];
|
||||
sprintf(directoryPathAbsolute, "%s*", dir.c_str());
|
||||
|
||||
hFind = FindFirstFile(directoryPathAbsolute, &findFileData);
|
||||
isAbsolutePath = true;
|
||||
if (hFind == INVALID_HANDLE_VALUE) {
|
||||
printf("Absolute path was also wrong.\n");
|
||||
return files;
|
||||
}
|
||||
}
|
||||
|
||||
// Loop through all files in the directory
|
||||
do {
|
||||
// Check if the found file is a regular file (not a directory)
|
||||
if (!(findFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
|
||||
if (isAbsolutePath) {
|
||||
files.push_back(dir + "\\" + std::string(findFileData.cFileName));
|
||||
} else {
|
||||
files.push_back(std::string(currentDirectory) + "\\" + dir + "\\" + std::string(findFileData.cFileName));
|
||||
}
|
||||
}
|
||||
} while (FindNextFile(hFind, &findFileData) != 0);
|
||||
|
||||
// Close the handle
|
||||
FindClose(hFind);
|
||||
|
||||
sort(files.begin(), files.end());
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
#else // Unix
|
||||
#include <dirent.h>
|
||||
#include <sys/stat.h>
|
||||
|
|
@ -173,27 +134,6 @@ std::string get_full_path(const std::string& dir, const std::string& filename) {
|
|||
return "";
|
||||
}
|
||||
|
||||
std::vector<std::string> get_files_from_dir(const std::string& dir) {
|
||||
std::vector<std::string> files;
|
||||
|
||||
DIR* dp = opendir(dir.c_str());
|
||||
|
||||
if (dp != nullptr) {
|
||||
struct dirent* entry;
|
||||
|
||||
while ((entry = readdir(dp)) != nullptr) {
|
||||
std::string fname = dir + "/" + entry->d_name;
|
||||
if (!is_directory(fname))
|
||||
files.push_back(fname);
|
||||
}
|
||||
closedir(dp);
|
||||
}
|
||||
|
||||
sort(files.begin(), files.end());
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// get_num_physical_cores is copy from
|
||||
|
|
@ -280,7 +220,7 @@ std::string path_join(const std::string& p1, const std::string& p2) {
|
|||
return p1 + "/" + p2;
|
||||
}
|
||||
|
||||
std::vector<std::string> splitString(const std::string& str, char delimiter) {
|
||||
std::vector<std::string> split_string(const std::string& str, char delimiter) {
|
||||
std::vector<std::string> result;
|
||||
size_t start = 0;
|
||||
size_t end = str.find(delimiter);
|
||||
|
|
@ -297,38 +237,6 @@ std::vector<std::string> splitString(const std::string& str, char delimiter) {
|
|||
return result;
|
||||
}
|
||||
|
||||
sd_image_t* preprocess_id_image(sd_image_t* img) {
|
||||
int shortest_edge = 224;
|
||||
int size = shortest_edge;
|
||||
sd_image_t* resized = NULL;
|
||||
uint32_t w = img->width;
|
||||
uint32_t h = img->height;
|
||||
uint32_t c = img->channel;
|
||||
|
||||
// 1. do resize using stb_resize functions
|
||||
|
||||
unsigned char* buf = (unsigned char*)malloc(sizeof(unsigned char) * 3 * size * size);
|
||||
if (!stbir_resize_uint8(img->data, w, h, 0,
|
||||
buf, size, size, 0,
|
||||
c)) {
|
||||
fprintf(stderr, "%s: resize operation failed \n ", __func__);
|
||||
return resized;
|
||||
}
|
||||
|
||||
// 2. do center crop (likely unnecessary due to step 1)
|
||||
|
||||
// 3. do rescale
|
||||
|
||||
// 4. do normalize
|
||||
|
||||
// 3 and 4 will need to be done in float format.
|
||||
|
||||
resized = new sd_image_t{(uint32_t)shortest_edge,
|
||||
(uint32_t)shortest_edge,
|
||||
3,
|
||||
buf};
|
||||
return resized;
|
||||
}
|
||||
|
||||
static int sdloglevel = 0; //-1 = hide all, 0 = normal, 1 = showall
|
||||
static bool sdquiet = false;
|
||||
|
|
@ -422,7 +330,10 @@ void log_printf(sd_log_level_t level, const char* file, int line, const char* fo
|
|||
if (written >= 0 && written < LOG_BUFFER_SIZE) {
|
||||
vsnprintf(log_buffer + written, LOG_BUFFER_SIZE - written, format, args);
|
||||
}
|
||||
strncat(log_buffer, "\n", LOG_BUFFER_SIZE - strlen(log_buffer));
|
||||
size_t len = strlen(log_buffer);
|
||||
if (log_buffer[len - 1] != '\n') {
|
||||
strncat(log_buffer, "\n", LOG_BUFFER_SIZE - len);
|
||||
}
|
||||
|
||||
if (sd_log_cb) {
|
||||
sd_log_cb(level, log_buffer, sd_log_cb_data);
|
||||
|
|
|
|||
|
|
@ -16,18 +16,15 @@ bool contains(const std::string& str, const std::string& substr);
|
|||
|
||||
void replace_all_chars(std::string& str, char target, char replacement);
|
||||
|
||||
int round_up_to(int value, int base);
|
||||
|
||||
bool file_exists(const std::string& filename);
|
||||
bool is_directory(const std::string& path);
|
||||
std::string get_full_path(const std::string& dir, const std::string& filename);
|
||||
|
||||
std::vector<std::string> get_files_from_dir(const std::string& dir);
|
||||
|
||||
std::u32string utf8_to_utf32(const std::string& utf8_str);
|
||||
std::string utf32_to_utf8(const std::u32string& utf32_str);
|
||||
std::u32string unicode_value_to_utf32(int unicode_value);
|
||||
|
||||
sd_image_t* preprocess_id_image(sd_image_t* img);
|
||||
|
||||
// std::string sd_basename(const std::string& path);
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -46,7 +43,7 @@ sd_image_f32_t resize_sd_image_f32_t(sd_image_f32_t image, int target_width, int
|
|||
sd_image_f32_t clip_preprocess(sd_image_f32_t image, int size);
|
||||
|
||||
std::string path_join(const std::string& p1, const std::string& p2);
|
||||
std::vector<std::string> splitString(const std::string& str, char delimiter);
|
||||
std::vector<std::string> split_string(const std::string& str, char delimiter);
|
||||
void pretty_progress(int step, int steps, float time);
|
||||
|
||||
void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...);
|
||||
|
|
|
|||
|
|
@ -520,17 +520,30 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
struct AutoEncoderKL : public GGMLRunner {
|
||||
struct VAE : public GGMLRunner {
|
||||
VAE(ggml_backend_t backend, bool offload_params_to_cpu)
|
||||
: GGMLRunner(backend, offload_params_to_cpu) {}
|
||||
virtual void compute(const int n_threads,
|
||||
struct ggml_tensor* z,
|
||||
bool decode_graph,
|
||||
struct ggml_tensor** output,
|
||||
struct ggml_context* output_ctx) = 0;
|
||||
virtual void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) = 0;
|
||||
virtual void enable_conv2d_direct(){};
|
||||
};
|
||||
|
||||
struct AutoEncoderKL : public VAE {
|
||||
bool decode_only = true;
|
||||
AutoencodingEngine ae;
|
||||
|
||||
AutoEncoderKL(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
const String2GGMLType& tensor_types,
|
||||
const std::string prefix,
|
||||
bool decode_only = false,
|
||||
bool use_video_decoder = false,
|
||||
SDVersion version = VERSION_SD1)
|
||||
: decode_only(decode_only), ae(decode_only, use_video_decoder, version), GGMLRunner(backend) {
|
||||
: decode_only(decode_only), ae(decode_only, use_video_decoder, version), VAE(backend, offload_params_to_cpu) {
|
||||
ae.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
|
|
@ -575,7 +588,7 @@ struct AutoEncoderKL : public GGMLRunner {
|
|||
};
|
||||
// ggml_set_f32(z, 0.5f);
|
||||
// print_ggml_tensor(z);
|
||||
GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
|
||||
GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
||||
}
|
||||
|
||||
void test() {
|
||||
|
|
|
|||
762304
otherarch/sdcpp/vocab_umt5.hpp
Normal file
762304
otherarch/sdcpp/vocab_umt5.hpp
Normal file
File diff suppressed because it is too large
Load diff
2304
otherarch/sdcpp/wan.hpp
Normal file
2304
otherarch/sdcpp/wan.hpp
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue