mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-21 18:52:02 +00:00
sd: sync to master-336-917f7bf (#1810)
This commit is contained in:
parent
9b842edc9a
commit
2cab657c60
30 changed files with 1249 additions and 964 deletions
|
|
@ -1,10 +1,10 @@
|
|||
#ifndef __AVI_WRITER_H__
|
||||
#define __AVI_WRITER_H__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "stable-diffusion.h"
|
||||
|
||||
|
|
@ -135,7 +135,7 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int
|
|||
write_u32_le(f, 0); // Colors important
|
||||
|
||||
// 'movi' LIST (video frames)
|
||||
long movi_list_pos = ftell(f);
|
||||
// long movi_list_pos = ftell(f);
|
||||
fwrite("LIST", 4, 1, f);
|
||||
long movi_size_pos = ftell(f);
|
||||
write_u32_le(f, 0); // Placeholder for movi size
|
||||
|
|
@ -154,7 +154,7 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int
|
|||
} jpeg_data;
|
||||
|
||||
for (int i = 0; i < num_images; i++) {
|
||||
jpeg_data.buf = NULL;
|
||||
jpeg_data.buf = nullptr;
|
||||
jpeg_data.size = 0;
|
||||
|
||||
// Callback function to collect JPEG data into memory
|
||||
|
|
|
|||
|
|
@ -550,7 +550,7 @@ protected:
|
|||
int64_t num_positions;
|
||||
bool force_clip_f32;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
|
||||
enum ggml_type token_wtype = GGML_TYPE_F32;
|
||||
if (!force_clip_f32) {
|
||||
token_wtype = get_type(prefix + "token_embedding.weight", tensor_types, GGML_TYPE_F32);
|
||||
|
|
@ -587,7 +587,7 @@ public:
|
|||
|
||||
GGML_ASSERT(input_ids->ne[0] == position_embed_weight->ne[1]);
|
||||
input_ids = ggml_reshape_3d(ctx, input_ids, input_ids->ne[0], 1, input_ids->ne[1]);
|
||||
auto token_embedding = ggml_get_rows(ctx, custom_embed_weight != NULL ? custom_embed_weight : token_embed_weight, input_ids);
|
||||
auto token_embedding = ggml_get_rows(ctx, custom_embed_weight != nullptr ? custom_embed_weight : token_embed_weight, input_ids);
|
||||
token_embedding = ggml_reshape_3d(ctx, token_embedding, token_embedding->ne[0], token_embedding->ne[1], token_embedding->ne[3]);
|
||||
|
||||
// token_embedding + position_embedding
|
||||
|
|
@ -606,7 +606,7 @@ protected:
|
|||
int64_t image_size;
|
||||
int64_t num_patches;
|
||||
int64_t num_positions;
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
|
||||
enum ggml_type patch_wtype = GGML_TYPE_F16;
|
||||
enum ggml_type class_wtype = GGML_TYPE_F32;
|
||||
enum ggml_type position_wtype = GGML_TYPE_F32;
|
||||
|
|
@ -641,10 +641,10 @@ public:
|
|||
// concat(patch_embedding, class_embedding) + position_embedding
|
||||
struct ggml_tensor* patch_embedding;
|
||||
int64_t N = pixel_values->ne[3];
|
||||
patch_embedding = ggml_nn_conv_2d(ctx, pixel_values, patch_embed_weight, NULL, patch_size, patch_size); // [N, embed_dim, image_size // pacht_size, image_size // pacht_size]
|
||||
patch_embedding = ggml_reshape_3d(ctx, patch_embedding, num_patches, embed_dim, N); // [N, embed_dim, num_patches]
|
||||
patch_embedding = ggml_cont(ctx, ggml_permute(ctx, patch_embedding, 1, 0, 2, 3)); // [N, num_patches, embed_dim]
|
||||
patch_embedding = ggml_reshape_4d(ctx, patch_embedding, 1, embed_dim, num_patches, N); // [N, num_patches, embed_dim, 1]
|
||||
patch_embedding = ggml_nn_conv_2d(ctx, pixel_values, patch_embed_weight, nullptr, patch_size, patch_size); // [N, embed_dim, image_size // pacht_size, image_size // pacht_size]
|
||||
patch_embedding = ggml_reshape_3d(ctx, patch_embedding, num_patches, embed_dim, N); // [N, embed_dim, num_patches]
|
||||
patch_embedding = ggml_cont(ctx, ggml_permute(ctx, patch_embedding, 1, 0, 2, 3)); // [N, num_patches, embed_dim]
|
||||
patch_embedding = ggml_reshape_4d(ctx, patch_embedding, 1, embed_dim, num_patches, N); // [N, num_patches, embed_dim, 1]
|
||||
|
||||
struct ggml_tensor* class_embedding = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, embed_dim, N);
|
||||
class_embedding = ggml_repeat(ctx, class_embed_weight, class_embedding); // [N, embed_dim]
|
||||
|
|
@ -669,7 +669,7 @@ enum CLIPVersion {
|
|||
|
||||
class CLIPTextModel : public GGMLBlock {
|
||||
protected:
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
|
||||
if (version == OPEN_CLIP_VIT_BIGG_14) {
|
||||
enum ggml_type wtype = GGML_TYPE_F32;
|
||||
params["text_projection"] = ggml_new_tensor_2d(ctx, wtype, projection_dim, hidden_size);
|
||||
|
|
@ -735,8 +735,8 @@ public:
|
|||
if (return_pooled) {
|
||||
auto text_projection = params["text_projection"];
|
||||
ggml_tensor* pooled = ggml_view_1d(ctx, x, hidden_size, x->nb[1] * max_token_idx);
|
||||
if (text_projection != NULL) {
|
||||
pooled = ggml_nn_linear(ctx, pooled, text_projection, NULL);
|
||||
if (text_projection != nullptr) {
|
||||
pooled = ggml_nn_linear(ctx, pooled, text_projection, nullptr);
|
||||
} else {
|
||||
LOG_DEBUG("identity projection");
|
||||
}
|
||||
|
|
@ -814,7 +814,7 @@ protected:
|
|||
int64_t out_features;
|
||||
bool transpose_weight;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
|
||||
enum ggml_type wtype = get_type(prefix + "weight", tensor_types, GGML_TYPE_F32);
|
||||
if (transpose_weight) {
|
||||
params["weight"] = ggml_new_tensor_2d(ctx, wtype, out_features, in_features);
|
||||
|
|
@ -831,12 +831,12 @@ public:
|
|||
out_features(out_features),
|
||||
transpose_weight(transpose_weight) {}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
struct ggml_tensor* w = params["weight"];
|
||||
if (transpose_weight) {
|
||||
w = ggml_cont(ctx, ggml_transpose(ctx, w));
|
||||
}
|
||||
return ggml_nn_linear(ctx, x, w, NULL);
|
||||
return ggml_nn_linear(ctx, x, w, nullptr);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -894,7 +894,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||
model.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return "clip";
|
||||
}
|
||||
|
||||
|
|
@ -921,7 +921,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||
|
||||
struct ggml_cgraph* build_graph(struct ggml_tensor* input_ids,
|
||||
int num_custom_embeddings = 0,
|
||||
void* custom_embeddings_data = NULL,
|
||||
void* custom_embeddings_data = nullptr,
|
||||
size_t max_token_idx = 0,
|
||||
bool return_pooled = false,
|
||||
int clip_skip = -1) {
|
||||
|
|
@ -929,9 +929,9 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||
|
||||
input_ids = to_backend(input_ids);
|
||||
|
||||
struct ggml_tensor* embeddings = NULL;
|
||||
struct ggml_tensor* embeddings = nullptr;
|
||||
|
||||
if (num_custom_embeddings > 0 && custom_embeddings_data != NULL) {
|
||||
if (num_custom_embeddings > 0 && custom_embeddings_data != nullptr) {
|
||||
auto token_embed_weight = model.get_token_embed_weight();
|
||||
auto custom_embeddings = ggml_new_tensor_2d(compute_ctx,
|
||||
token_embed_weight->type,
|
||||
|
|
@ -958,7 +958,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||
bool return_pooled,
|
||||
int clip_skip,
|
||||
ggml_tensor** output,
|
||||
ggml_context* output_ctx = NULL) {
|
||||
ggml_context* output_ctx = nullptr) {
|
||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_graph(input_ids, num_custom_embeddings, custom_embeddings_data, max_token_idx, return_pooled, clip_skip);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
virtual struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* emb = NULL) {
|
||||
virtual struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* emb = nullptr) {
|
||||
// For dims==3, we reduce dimension from 5d to 4d by merging h and w, in order not to change ggml
|
||||
// [N, c, t, h, w] => [N, c, t, h * w]
|
||||
// x: [N, channels, h, w] if dims == 2 else [N, channels, t, h, w]
|
||||
|
|
@ -131,7 +131,7 @@ public:
|
|||
auto out_layers_0 = std::dynamic_pointer_cast<GroupNorm32>(blocks["out_layers.0"]);
|
||||
auto out_layers_3 = std::dynamic_pointer_cast<UnaryBlock>(blocks["out_layers.3"]);
|
||||
|
||||
if (emb == NULL) {
|
||||
if (emb == nullptr) {
|
||||
GGML_ASSERT(skip_t_emb);
|
||||
}
|
||||
|
||||
|
|
@ -182,7 +182,7 @@ protected:
|
|||
int64_t dim_in;
|
||||
int64_t dim_out;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") override {
|
||||
enum ggml_type wtype = get_type(prefix + "proj.weight", tensor_types, GGML_TYPE_F32);
|
||||
enum ggml_type bias_wtype = GGML_TYPE_F32;
|
||||
params["proj.weight"] = ggml_new_tensor_2d(ctx, wtype, dim_in, dim_out * 2);
|
||||
|
|
@ -193,7 +193,7 @@ public:
|
|||
GEGLU(int64_t dim_in, int64_t dim_out)
|
||||
: dim_in(dim_in), dim_out(dim_out) {}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
// x: [ne3, ne2, ne1, dim_in]
|
||||
// return: [ne3, ne2, ne1, dim_out]
|
||||
struct ggml_tensor* w = params["proj.weight"];
|
||||
|
|
@ -222,7 +222,7 @@ public:
|
|||
blocks["proj"] = std::shared_ptr<GGMLBlock>(new Linear(dim_in, dim_out, bias));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
// x: [ne3, ne2, ne1, dim_in]
|
||||
// return: [ne3, ne2, ne1, dim_out]
|
||||
auto proj = std::dynamic_pointer_cast<Linear>(blocks["proj"]);
|
||||
|
|
@ -325,7 +325,7 @@ public:
|
|||
auto k = to_k->forward(ctx, context); // [N, n_context, inner_dim]
|
||||
auto v = to_v->forward(ctx, context); // [N, n_context, inner_dim]
|
||||
|
||||
x = ggml_nn_attention_ext(ctx, backend, q, k, v, n_head, NULL, false, false, flash_attn); // [N, n_token, inner_dim]
|
||||
x = ggml_nn_attention_ext(ctx, backend, q, k, v, n_head, nullptr, false, false, flash_attn); // [N, n_token, inner_dim]
|
||||
|
||||
x = to_out_0->forward(ctx, x); // [N, n_token, query_dim]
|
||||
return x;
|
||||
|
|
@ -483,7 +483,7 @@ public:
|
|||
|
||||
class AlphaBlender : public GGMLBlock {
|
||||
protected:
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") override {
|
||||
// Get the type of the "mix_factor" tensor from the input tensors map with the specified prefix
|
||||
enum ggml_type wtype = GGML_TYPE_F32;
|
||||
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
||||
|
|
|
|||
|
|
@ -6,9 +6,9 @@
|
|||
#include "t5.hpp"
|
||||
|
||||
struct SDCondition {
|
||||
struct ggml_tensor* c_crossattn = NULL; // aka context
|
||||
struct ggml_tensor* c_vector = NULL; // aka y
|
||||
struct ggml_tensor* c_concat = NULL;
|
||||
struct ggml_tensor* c_crossattn = nullptr; // aka context
|
||||
struct ggml_tensor* c_vector = nullptr; // aka y
|
||||
struct ggml_tensor* c_concat = nullptr;
|
||||
|
||||
SDCondition() = default;
|
||||
SDCondition(struct ggml_tensor* c_crossattn, struct ggml_tensor* c_vector, struct ggml_tensor* c_concat)
|
||||
|
|
@ -79,28 +79,28 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
}
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
||||
text_model->get_param_tensors(tensors, "cond_stage_model.transformer.text_model");
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
text_model2->get_param_tensors(tensors, "cond_stage_model.1.transformer.text_model");
|
||||
}
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
void alloc_params_buffer() override {
|
||||
text_model->alloc_params_buffer();
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
text_model2->alloc_params_buffer();
|
||||
}
|
||||
}
|
||||
|
||||
void free_params_buffer() {
|
||||
void free_params_buffer() override {
|
||||
text_model->free_params_buffer();
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
text_model2->free_params_buffer();
|
||||
}
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
size_t get_params_buffer_size() override {
|
||||
size_t buffer_size = text_model->get_params_buffer_size();
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
buffer_size += text_model2->get_params_buffer_size();
|
||||
|
|
@ -121,11 +121,11 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
}
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = 100 * 1024 * 1024; // max for custom embeddings 100 MB
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
struct ggml_context* embd_ctx = ggml_init(params);
|
||||
struct ggml_tensor* embd = NULL;
|
||||
struct ggml_tensor* embd2 = NULL;
|
||||
struct ggml_tensor* embd = nullptr;
|
||||
struct ggml_tensor* embd2 = nullptr;
|
||||
auto on_load = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) {
|
||||
if (tensor_storage.ne[0] != text_model->model.hidden_size) {
|
||||
if (text_model2) {
|
||||
|
|
@ -404,11 +404,11 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
int adm_in_channels = -1,
|
||||
bool zero_out_masked = false) {
|
||||
int64_t t0 = ggml_time_ms();
|
||||
struct ggml_tensor* hidden_states = NULL; // [N, n_token, hidden_size]
|
||||
struct ggml_tensor* chunk_hidden_states = NULL; // [n_token, hidden_size] or [n_token, hidden_size + hidden_size2]
|
||||
struct ggml_tensor* chunk_hidden_states1 = NULL; // [n_token, hidden_size]
|
||||
struct ggml_tensor* chunk_hidden_states2 = NULL; // [n_token, hidden_size2]
|
||||
struct ggml_tensor* pooled = NULL;
|
||||
struct ggml_tensor* hidden_states = nullptr; // [N, n_token, hidden_size]
|
||||
struct ggml_tensor* chunk_hidden_states = nullptr; // [n_token, hidden_size] or [n_token, hidden_size + hidden_size2]
|
||||
struct ggml_tensor* chunk_hidden_states1 = nullptr; // [n_token, hidden_size]
|
||||
struct ggml_tensor* chunk_hidden_states2 = nullptr; // [n_token, hidden_size2]
|
||||
struct ggml_tensor* pooled = nullptr;
|
||||
std::vector<float> hidden_states_vec;
|
||||
|
||||
if (clip_skip <= 0) {
|
||||
|
|
@ -424,7 +424,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
weights.begin() + (chunk_idx + 1) * chunk_len);
|
||||
|
||||
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, chunk_tokens);
|
||||
struct ggml_tensor* input_ids2 = NULL;
|
||||
struct ggml_tensor* input_ids2 = nullptr;
|
||||
size_t max_token_idx = 0;
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
auto it = std::find(chunk_tokens.begin(), chunk_tokens.end(), tokenizer.EOS_TOKEN_ID);
|
||||
|
|
@ -512,7 +512,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
chunk_hidden_states->ne[0],
|
||||
ggml_nelements(hidden_states) / chunk_hidden_states->ne[0]);
|
||||
|
||||
ggml_tensor* vec = NULL;
|
||||
ggml_tensor* vec = nullptr;
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
int out_dim = 256;
|
||||
vec = ggml_new_tensor_1d(work_ctx, GGML_TYPE_F32, adm_in_channels);
|
||||
|
|
@ -549,13 +549,13 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
GGML_ASSERT(offset == ggml_nbytes(vec));
|
||||
}
|
||||
// print_ggml_tensor(result);
|
||||
return SDCondition(hidden_states, vec, NULL);
|
||||
return {hidden_states, vec, nullptr};
|
||||
}
|
||||
|
||||
std::tuple<SDCondition, std::vector<bool>>
|
||||
get_learned_condition_with_trigger(ggml_context* work_ctx,
|
||||
int n_threads,
|
||||
const ConditionerParams& conditioner_params) {
|
||||
const ConditionerParams& conditioner_params) override {
|
||||
auto image_tokens = convert_token_to_id(trigger_word);
|
||||
// if(image_tokens.size() == 1){
|
||||
// printf(" image token id is: %d \n", image_tokens[0]);
|
||||
|
|
@ -589,7 +589,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
}
|
||||
|
||||
std::string remove_trigger_from_prompt(ggml_context* work_ctx,
|
||||
const std::string& prompt) {
|
||||
const std::string& prompt) override {
|
||||
auto image_tokens = convert_token_to_id(trigger_word);
|
||||
GGML_ASSERT(image_tokens.size() == 1);
|
||||
auto tokens_and_weights = tokenize(prompt, false);
|
||||
|
|
@ -602,7 +602,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
|
||||
SDCondition get_learned_condition(ggml_context* work_ctx,
|
||||
int n_threads,
|
||||
const ConditionerParams& conditioner_params) {
|
||||
const ConditionerParams& conditioner_params) override {
|
||||
auto tokens_and_weights = tokenize(conditioner_params.text, true);
|
||||
std::vector<int>& tokens = tokens_and_weights.first;
|
||||
std::vector<float>& weights = tokens_and_weights.second;
|
||||
|
|
@ -628,7 +628,7 @@ struct FrozenCLIPVisionEmbedder : public GGMLRunner {
|
|||
vision_model.init(params_ctx, tensor_types, "cond_stage_model.transformer");
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return "clip_vision";
|
||||
}
|
||||
|
||||
|
|
@ -678,25 +678,25 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
t5 = std::make_shared<T5Runner>(backend, offload_params_to_cpu, tensor_types, "text_encoders.t5xxl.transformer");
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
||||
clip_l->get_param_tensors(tensors, "text_encoders.clip_l.transformer.text_model");
|
||||
clip_g->get_param_tensors(tensors, "text_encoders.clip_g.transformer.text_model");
|
||||
t5->get_param_tensors(tensors, "text_encoders.t5xxl.transformer");
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
void alloc_params_buffer() override {
|
||||
clip_l->alloc_params_buffer();
|
||||
clip_g->alloc_params_buffer();
|
||||
t5->alloc_params_buffer();
|
||||
}
|
||||
|
||||
void free_params_buffer() {
|
||||
void free_params_buffer() override {
|
||||
clip_l->free_params_buffer();
|
||||
clip_g->free_params_buffer();
|
||||
t5->free_params_buffer();
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
size_t get_params_buffer_size() override {
|
||||
size_t buffer_size = clip_l->get_params_buffer_size();
|
||||
buffer_size += clip_g->get_params_buffer_size();
|
||||
buffer_size += t5->get_params_buffer_size();
|
||||
|
|
@ -747,7 +747,7 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
|
||||
clip_l_tokenizer.pad_tokens(clip_l_tokens, clip_l_weights, max_length, padding);
|
||||
clip_g_tokenizer.pad_tokens(clip_g_tokens, clip_g_weights, max_length, padding);
|
||||
t5_tokenizer.pad_tokens(t5_tokens, t5_weights, NULL, max_length, padding);
|
||||
t5_tokenizer.pad_tokens(t5_tokens, t5_weights, nullptr, max_length, padding);
|
||||
|
||||
// for (int i = 0; i < clip_l_tokens.size(); i++) {
|
||||
// std::cout << clip_l_tokens[i] << ":" << clip_l_weights[i] << ", ";
|
||||
|
|
@ -784,14 +784,14 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
}
|
||||
|
||||
int64_t t0 = ggml_time_ms();
|
||||
struct ggml_tensor* hidden_states = NULL; // [N, n_token*2, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states = NULL; // [n_token*2, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states_l = NULL; // [n_token, hidden_size_l]
|
||||
struct ggml_tensor* chunk_hidden_states_g = NULL; // [n_token, hidden_size_g]
|
||||
struct ggml_tensor* chunk_hidden_states_t5 = NULL; // [n_token, hidden_size_t5]
|
||||
struct ggml_tensor* pooled = NULL;
|
||||
struct ggml_tensor* pooled_l = NULL; // [768,]
|
||||
struct ggml_tensor* pooled_g = NULL; // [1280,]
|
||||
struct ggml_tensor* hidden_states = nullptr; // [N, n_token*2, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states = nullptr; // [n_token*2, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states_l = nullptr; // [n_token, hidden_size_l]
|
||||
struct ggml_tensor* chunk_hidden_states_g = nullptr; // [n_token, hidden_size_g]
|
||||
struct ggml_tensor* chunk_hidden_states_t5 = nullptr; // [n_token, hidden_size_t5]
|
||||
struct ggml_tensor* pooled = nullptr;
|
||||
struct ggml_tensor* pooled_l = nullptr; // [768,]
|
||||
struct ggml_tensor* pooled_g = nullptr; // [1280,]
|
||||
std::vector<float> hidden_states_vec;
|
||||
|
||||
size_t chunk_len = 77;
|
||||
|
|
@ -810,7 +810,7 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
clip_l->compute(n_threads,
|
||||
input_ids,
|
||||
0,
|
||||
NULL,
|
||||
nullptr,
|
||||
max_token_idx,
|
||||
false,
|
||||
clip_skip,
|
||||
|
|
@ -838,7 +838,7 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
clip_l->compute(n_threads,
|
||||
input_ids,
|
||||
0,
|
||||
NULL,
|
||||
nullptr,
|
||||
max_token_idx,
|
||||
true,
|
||||
clip_skip,
|
||||
|
|
@ -860,7 +860,7 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
clip_g->compute(n_threads,
|
||||
input_ids,
|
||||
0,
|
||||
NULL,
|
||||
nullptr,
|
||||
max_token_idx,
|
||||
false,
|
||||
clip_skip,
|
||||
|
|
@ -889,7 +889,7 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
clip_g->compute(n_threads,
|
||||
input_ids,
|
||||
0,
|
||||
NULL,
|
||||
nullptr,
|
||||
max_token_idx,
|
||||
true,
|
||||
clip_skip,
|
||||
|
|
@ -909,7 +909,7 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
|
||||
t5->compute(n_threads,
|
||||
input_ids,
|
||||
NULL,
|
||||
nullptr,
|
||||
&chunk_hidden_states_t5,
|
||||
work_ctx);
|
||||
{
|
||||
|
|
@ -974,12 +974,12 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
hidden_states,
|
||||
chunk_hidden_states->ne[0],
|
||||
ggml_nelements(hidden_states) / chunk_hidden_states->ne[0]);
|
||||
return SDCondition(hidden_states, pooled, NULL);
|
||||
return {hidden_states, pooled, nullptr};
|
||||
}
|
||||
|
||||
SDCondition get_learned_condition(ggml_context* work_ctx,
|
||||
int n_threads,
|
||||
const ConditionerParams& conditioner_params) {
|
||||
const ConditionerParams& conditioner_params) override {
|
||||
auto tokens_and_weights = tokenize(conditioner_params.text, 77, true);
|
||||
return get_learned_condition_common(work_ctx,
|
||||
n_threads,
|
||||
|
|
@ -1003,22 +1003,22 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
t5 = std::make_shared<T5Runner>(backend, offload_params_to_cpu, tensor_types, "text_encoders.t5xxl.transformer");
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
||||
clip_l->get_param_tensors(tensors, "text_encoders.clip_l.transformer.text_model");
|
||||
t5->get_param_tensors(tensors, "text_encoders.t5xxl.transformer");
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
void alloc_params_buffer() override {
|
||||
clip_l->alloc_params_buffer();
|
||||
t5->alloc_params_buffer();
|
||||
}
|
||||
|
||||
void free_params_buffer() {
|
||||
void free_params_buffer() override {
|
||||
clip_l->free_params_buffer();
|
||||
t5->free_params_buffer();
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
size_t get_params_buffer_size() override {
|
||||
size_t buffer_size = clip_l->get_params_buffer_size();
|
||||
buffer_size += t5->get_params_buffer_size();
|
||||
return buffer_size;
|
||||
|
|
@ -1061,7 +1061,7 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
}
|
||||
|
||||
clip_l_tokenizer.pad_tokens(clip_l_tokens, clip_l_weights, 77, padding);
|
||||
t5_tokenizer.pad_tokens(t5_tokens, t5_weights, NULL, max_length, padding);
|
||||
t5_tokenizer.pad_tokens(t5_tokens, t5_weights, nullptr, max_length, padding);
|
||||
|
||||
// for (int i = 0; i < clip_l_tokens.size(); i++) {
|
||||
// std::cout << clip_l_tokens[i] << ":" << clip_l_weights[i] << ", ";
|
||||
|
|
@ -1091,9 +1091,9 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
}
|
||||
|
||||
int64_t t0 = ggml_time_ms();
|
||||
struct ggml_tensor* hidden_states = NULL; // [N, n_token, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states = NULL; // [n_token, 4096]
|
||||
struct ggml_tensor* pooled = NULL; // [768,]
|
||||
struct ggml_tensor* hidden_states = nullptr; // [N, n_token, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states = nullptr; // [n_token, 4096]
|
||||
struct ggml_tensor* pooled = nullptr; // [768,]
|
||||
std::vector<float> hidden_states_vec;
|
||||
|
||||
size_t chunk_count = t5_tokens.size() / chunk_len;
|
||||
|
|
@ -1115,7 +1115,7 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
clip_l->compute(n_threads,
|
||||
input_ids,
|
||||
0,
|
||||
NULL,
|
||||
nullptr,
|
||||
max_token_idx,
|
||||
true,
|
||||
clip_skip,
|
||||
|
|
@ -1134,7 +1134,7 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
|
||||
t5->compute(n_threads,
|
||||
input_ids,
|
||||
NULL,
|
||||
nullptr,
|
||||
&chunk_hidden_states,
|
||||
work_ctx);
|
||||
{
|
||||
|
|
@ -1173,12 +1173,12 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
hidden_states,
|
||||
chunk_hidden_states->ne[0],
|
||||
ggml_nelements(hidden_states) / chunk_hidden_states->ne[0]);
|
||||
return SDCondition(hidden_states, pooled, NULL);
|
||||
return {hidden_states, pooled, nullptr};
|
||||
}
|
||||
|
||||
SDCondition get_learned_condition(ggml_context* work_ctx,
|
||||
int n_threads,
|
||||
const ConditionerParams& conditioner_params) {
|
||||
const ConditionerParams& conditioner_params) override {
|
||||
auto tokens_and_weights = tokenize(conditioner_params.text, chunk_len, true);
|
||||
return get_learned_condition_common(work_ctx,
|
||||
n_threads,
|
||||
|
|
@ -1206,19 +1206,19 @@ struct T5CLIPEmbedder : public Conditioner {
|
|||
t5 = std::make_shared<T5Runner>(backend, offload_params_to_cpu, tensor_types, "text_encoders.t5xxl.transformer", is_umt5);
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
||||
t5->get_param_tensors(tensors, "text_encoders.t5xxl.transformer");
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
void alloc_params_buffer() override {
|
||||
t5->alloc_params_buffer();
|
||||
}
|
||||
|
||||
void free_params_buffer() {
|
||||
void free_params_buffer() override {
|
||||
t5->free_params_buffer();
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
size_t get_params_buffer_size() override {
|
||||
size_t buffer_size = 0;
|
||||
|
||||
buffer_size += t5->get_params_buffer_size();
|
||||
|
|
@ -1287,9 +1287,9 @@ struct T5CLIPEmbedder : public Conditioner {
|
|||
auto& t5_attn_mask_vec = std::get<2>(token_and_weights);
|
||||
|
||||
int64_t t0 = ggml_time_ms();
|
||||
struct ggml_tensor* hidden_states = NULL; // [N, n_token, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states = NULL; // [n_token, 4096]
|
||||
struct ggml_tensor* pooled = NULL;
|
||||
struct ggml_tensor* hidden_states = nullptr; // [N, n_token, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states = nullptr; // [n_token, 4096]
|
||||
struct ggml_tensor* pooled = nullptr;
|
||||
struct ggml_tensor* t5_attn_mask = vector_to_ggml_tensor(work_ctx, t5_attn_mask_vec); // [n_token]
|
||||
|
||||
std::vector<float> hidden_states_vec;
|
||||
|
|
@ -1306,7 +1306,7 @@ struct T5CLIPEmbedder : public Conditioner {
|
|||
t5_attn_mask_vec.begin() + (chunk_idx + 1) * chunk_len);
|
||||
|
||||
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, chunk_tokens);
|
||||
auto t5_attn_mask_chunk = use_mask ? vector_to_ggml_tensor(work_ctx, chunk_mask) : NULL;
|
||||
auto t5_attn_mask_chunk = use_mask ? vector_to_ggml_tensor(work_ctx, chunk_mask) : nullptr;
|
||||
|
||||
t5->compute(n_threads,
|
||||
input_ids,
|
||||
|
|
@ -1358,12 +1358,12 @@ struct T5CLIPEmbedder : public Conditioner {
|
|||
|
||||
modify_mask_to_attend_padding(t5_attn_mask, ggml_nelements(t5_attn_mask), mask_pad);
|
||||
|
||||
return SDCondition(hidden_states, t5_attn_mask, NULL);
|
||||
return {hidden_states, t5_attn_mask, nullptr};
|
||||
}
|
||||
|
||||
SDCondition get_learned_condition(ggml_context* work_ctx,
|
||||
int n_threads,
|
||||
const ConditionerParams& conditioner_params) {
|
||||
const ConditionerParams& conditioner_params) override {
|
||||
auto tokens_and_weights = tokenize(conditioner_params.text, chunk_len, true);
|
||||
return get_learned_condition_common(work_ctx,
|
||||
n_threads,
|
||||
|
|
@ -1389,19 +1389,19 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
|
|||
enable_vision);
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
||||
qwenvl->get_param_tensors(tensors, "text_encoders.qwen2vl");
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
void alloc_params_buffer() override {
|
||||
qwenvl->alloc_params_buffer();
|
||||
}
|
||||
|
||||
void free_params_buffer() {
|
||||
void free_params_buffer() override {
|
||||
qwenvl->free_params_buffer();
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
size_t get_params_buffer_size() override {
|
||||
size_t buffer_size = 0;
|
||||
buffer_size += qwenvl->get_params_buffer_size();
|
||||
return buffer_size;
|
||||
|
|
@ -1454,7 +1454,7 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
|
|||
|
||||
SDCondition get_learned_condition(ggml_context* work_ctx,
|
||||
int n_threads,
|
||||
const ConditionerParams& conditioner_params) {
|
||||
const ConditionerParams& conditioner_params) override {
|
||||
std::string prompt;
|
||||
std::vector<std::pair<int, ggml_tensor*>> image_embeds;
|
||||
size_t system_prompt_length = 0;
|
||||
|
|
@ -1530,7 +1530,7 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
|
|||
auto& weights = std::get<1>(tokens_and_weights);
|
||||
|
||||
int64_t t0 = ggml_time_ms();
|
||||
struct ggml_tensor* hidden_states = NULL; // [N, n_token, 3584]
|
||||
struct ggml_tensor* hidden_states = nullptr; // [N, n_token, 3584]
|
||||
|
||||
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, tokens);
|
||||
|
||||
|
|
@ -1570,7 +1570,7 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
|
|||
|
||||
int64_t t1 = ggml_time_ms();
|
||||
LOG_DEBUG("computing condition graph completed, taking %" PRId64 " ms", t1 - t0);
|
||||
return SDCondition(new_hidden_states, nullptr, nullptr);
|
||||
return {new_hidden_states, nullptr, nullptr};
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -206,18 +206,18 @@ public:
|
|||
struct ggml_tensor* guided_hint,
|
||||
struct ggml_tensor* timesteps,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* y = NULL) {
|
||||
struct ggml_tensor* y = nullptr) {
|
||||
// x: [N, in_channels, h, w] or [N, in_channels/2, h, w]
|
||||
// timesteps: [N,]
|
||||
// context: [N, max_position, hidden_size] or [1, max_position, hidden_size]. for example, [N, 77, 768]
|
||||
// y: [N, adm_in_channels] or [1, adm_in_channels]
|
||||
if (context != NULL) {
|
||||
if (context != nullptr) {
|
||||
if (context->ne[2] != x->ne[3]) {
|
||||
context = ggml_repeat(ctx, context, ggml_new_tensor_3d(ctx, GGML_TYPE_F32, context->ne[0], context->ne[1], x->ne[3]));
|
||||
}
|
||||
}
|
||||
|
||||
if (y != NULL) {
|
||||
if (y != nullptr) {
|
||||
if (y->ne[1] != x->ne[3]) {
|
||||
y = ggml_repeat(ctx, y, ggml_new_tensor_2d(ctx, GGML_TYPE_F32, y->ne[0], x->ne[3]));
|
||||
}
|
||||
|
|
@ -237,7 +237,7 @@ public:
|
|||
emb = time_embed_2->forward(ctx, emb); // [N, time_embed_dim]
|
||||
|
||||
// SDXL/SVD
|
||||
if (y != NULL) {
|
||||
if (y != nullptr) {
|
||||
auto label_embed_0 = std::dynamic_pointer_cast<Linear>(blocks["label_emb.0.0"]);
|
||||
auto label_embed_2 = std::dynamic_pointer_cast<Linear>(blocks["label_emb.0.2"]);
|
||||
|
||||
|
|
@ -250,7 +250,7 @@ public:
|
|||
|
||||
std::vector<struct ggml_tensor*> outs;
|
||||
|
||||
if (guided_hint == NULL) {
|
||||
if (guided_hint == nullptr) {
|
||||
guided_hint = input_hint_block_forward(ctx, hint, emb, context);
|
||||
}
|
||||
outs.push_back(guided_hint);
|
||||
|
|
@ -312,10 +312,10 @@ struct ControlNet : public GGMLRunner {
|
|||
SDVersion version = VERSION_SD1;
|
||||
ControlNetBlock control_net;
|
||||
|
||||
ggml_backend_buffer_t control_buffer = NULL; // keep control output tensors in backend memory
|
||||
ggml_context* control_ctx = NULL;
|
||||
ggml_backend_buffer_t control_buffer = nullptr; // keep control output tensors in backend memory
|
||||
ggml_context* control_ctx = nullptr;
|
||||
std::vector<struct ggml_tensor*> controls; // (12 input block outputs, 1 middle block output) SD 1.5
|
||||
struct ggml_tensor* guided_hint = NULL; // guided_hint cache, for faster inference
|
||||
struct ggml_tensor* guided_hint = nullptr; // guided_hint cache, for faster inference
|
||||
bool guided_hint_cached = false;
|
||||
|
||||
ControlNet(ggml_backend_t backend,
|
||||
|
|
@ -337,14 +337,14 @@ struct ControlNet : public GGMLRunner {
|
|||
}
|
||||
}
|
||||
|
||||
~ControlNet() {
|
||||
~ControlNet() override {
|
||||
free_control_ctx();
|
||||
}
|
||||
|
||||
void alloc_control_ctx(std::vector<struct ggml_tensor*> outs) {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(outs.size() * ggml_tensor_overhead()) + 1024 * 1024;
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = true;
|
||||
control_ctx = ggml_init(params);
|
||||
|
||||
|
|
@ -366,20 +366,20 @@ struct ControlNet : public GGMLRunner {
|
|||
}
|
||||
|
||||
void free_control_ctx() {
|
||||
if (control_buffer != NULL) {
|
||||
if (control_buffer != nullptr) {
|
||||
ggml_backend_buffer_free(control_buffer);
|
||||
control_buffer = NULL;
|
||||
control_buffer = nullptr;
|
||||
}
|
||||
if (control_ctx != NULL) {
|
||||
if (control_ctx != nullptr) {
|
||||
ggml_free(control_ctx);
|
||||
control_ctx = NULL;
|
||||
control_ctx = nullptr;
|
||||
}
|
||||
guided_hint = NULL;
|
||||
guided_hint = nullptr;
|
||||
guided_hint_cached = false;
|
||||
controls.clear();
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return "control_net";
|
||||
}
|
||||
|
||||
|
|
@ -391,12 +391,12 @@ struct ControlNet : public GGMLRunner {
|
|||
struct ggml_tensor* hint,
|
||||
struct ggml_tensor* timesteps,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* y = NULL) {
|
||||
struct ggml_tensor* y = nullptr) {
|
||||
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, CONTROL_NET_GRAPH_SIZE, false);
|
||||
|
||||
x = to_backend(x);
|
||||
if (guided_hint_cached) {
|
||||
hint = NULL;
|
||||
hint = nullptr;
|
||||
} else {
|
||||
hint = to_backend(hint);
|
||||
}
|
||||
|
|
@ -408,12 +408,12 @@ struct ControlNet : public GGMLRunner {
|
|||
runtime_backend,
|
||||
x,
|
||||
hint,
|
||||
guided_hint_cached ? guided_hint : NULL,
|
||||
guided_hint_cached ? guided_hint : nullptr,
|
||||
timesteps,
|
||||
context,
|
||||
y);
|
||||
|
||||
if (control_ctx == NULL) {
|
||||
if (control_ctx == nullptr) {
|
||||
alloc_control_ctx(outs);
|
||||
}
|
||||
|
||||
|
|
@ -431,8 +431,8 @@ struct ControlNet : public GGMLRunner {
|
|||
struct ggml_tensor* timesteps,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
struct ggml_tensor** output = nullptr,
|
||||
struct ggml_context* output_ctx = nullptr) {
|
||||
// x: [N, in_channels, h, w]
|
||||
// timesteps: [N, ]
|
||||
// context: [N, max_position, hidden_size]([N, 77, 768]) or [1, max_position, hidden_size]
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ struct SigmaSchedule {
|
|||
};
|
||||
|
||||
struct DiscreteSchedule : SigmaSchedule {
|
||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
|
||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
|
||||
std::vector<float> result;
|
||||
|
||||
int t_max = TIMESTEPS - 1;
|
||||
|
|
@ -43,7 +43,7 @@ struct DiscreteSchedule : SigmaSchedule {
|
|||
};
|
||||
|
||||
struct ExponentialSchedule : SigmaSchedule {
|
||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
|
||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
|
||||
std::vector<float> sigmas;
|
||||
|
||||
// Calculate step size
|
||||
|
|
@ -150,7 +150,7 @@ std::vector<float> log_linear_interpolation(std::vector<float> sigma_in,
|
|||
https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html
|
||||
*/
|
||||
struct AYSSchedule : SigmaSchedule {
|
||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
|
||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
|
||||
const std::vector<float> noise_levels[] = {
|
||||
/* SD1.5 */
|
||||
{14.6146412293f, 6.4745760956f, 3.8636745985f, 2.6946151520f,
|
||||
|
|
@ -204,7 +204,7 @@ struct AYSSchedule : SigmaSchedule {
|
|||
* GITS Scheduler: https://github.com/zju-pi/diff-sampler/tree/main/gits-main
|
||||
*/
|
||||
struct GITSSchedule : SigmaSchedule {
|
||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
|
||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
|
||||
if (sigma_max <= 0.0f) {
|
||||
return std::vector<float>{};
|
||||
}
|
||||
|
|
@ -252,7 +252,7 @@ struct SGMUniformSchedule : SigmaSchedule {
|
|||
};
|
||||
|
||||
struct KarrasSchedule : SigmaSchedule {
|
||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
|
||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
|
||||
// These *COULD* be function arguments here,
|
||||
// but does anybody ever bother to touch them?
|
||||
float rho = 7.f;
|
||||
|
|
@ -350,15 +350,15 @@ struct CompVisDenoiser : public Denoiser {
|
|||
|
||||
float sigma_data = 1.0f;
|
||||
|
||||
float sigma_min() {
|
||||
float sigma_min() override {
|
||||
return sigmas[0];
|
||||
}
|
||||
|
||||
float sigma_max() {
|
||||
float sigma_max() override {
|
||||
return sigmas[TIMESTEPS - 1];
|
||||
}
|
||||
|
||||
float sigma_to_t(float sigma) {
|
||||
float sigma_to_t(float sigma) override {
|
||||
float log_sigma = std::log(sigma);
|
||||
std::vector<float> dists;
|
||||
dists.reserve(TIMESTEPS);
|
||||
|
|
@ -384,7 +384,7 @@ struct CompVisDenoiser : public Denoiser {
|
|||
return t;
|
||||
}
|
||||
|
||||
float t_to_sigma(float t) {
|
||||
float t_to_sigma(float t) override {
|
||||
int low_idx = static_cast<int>(std::floor(t));
|
||||
int high_idx = static_cast<int>(std::ceil(t));
|
||||
float w = t - static_cast<float>(low_idx);
|
||||
|
|
@ -392,7 +392,7 @@ struct CompVisDenoiser : public Denoiser {
|
|||
return std::exp(log_sigma);
|
||||
}
|
||||
|
||||
std::vector<float> get_scalings(float sigma) {
|
||||
std::vector<float> get_scalings(float sigma) override {
|
||||
float c_skip = 1.0f;
|
||||
float c_out = -sigma;
|
||||
float c_in = 1.0f / std::sqrt(sigma * sigma + sigma_data * sigma_data);
|
||||
|
|
@ -400,19 +400,19 @@ struct CompVisDenoiser : public Denoiser {
|
|||
}
|
||||
|
||||
// this function will modify noise/latent
|
||||
ggml_tensor* noise_scaling(float sigma, ggml_tensor* noise, ggml_tensor* latent) {
|
||||
ggml_tensor* noise_scaling(float sigma, ggml_tensor* noise, ggml_tensor* latent) override {
|
||||
ggml_tensor_scale(noise, sigma);
|
||||
ggml_tensor_add(latent, noise);
|
||||
return latent;
|
||||
}
|
||||
|
||||
ggml_tensor* inverse_noise_scaling(float sigma, ggml_tensor* latent) {
|
||||
ggml_tensor* inverse_noise_scaling(float sigma, ggml_tensor* latent) override {
|
||||
return latent;
|
||||
}
|
||||
};
|
||||
|
||||
struct CompVisVDenoiser : public CompVisDenoiser {
|
||||
std::vector<float> get_scalings(float sigma) {
|
||||
std::vector<float> get_scalings(float sigma) override {
|
||||
float c_skip = sigma_data * sigma_data / (sigma * sigma + sigma_data * sigma_data);
|
||||
float c_out = -sigma * sigma_data / std::sqrt(sigma * sigma + sigma_data * sigma_data);
|
||||
float c_in = 1.0f / std::sqrt(sigma * sigma + sigma_data * sigma_data);
|
||||
|
|
@ -429,19 +429,19 @@ struct EDMVDenoiser : public CompVisVDenoiser {
|
|||
scheduler = std::make_shared<ExponentialSchedule>();
|
||||
}
|
||||
|
||||
float t_to_sigma(float t) {
|
||||
float t_to_sigma(float t) override {
|
||||
return std::exp(t * 4 / (float)TIMESTEPS);
|
||||
}
|
||||
|
||||
float sigma_to_t(float s) {
|
||||
float sigma_to_t(float s) override {
|
||||
return 0.25 * std::log(s);
|
||||
}
|
||||
|
||||
float sigma_min() {
|
||||
float sigma_min() override {
|
||||
return min_sigma;
|
||||
}
|
||||
|
||||
float sigma_max() {
|
||||
float sigma_max() override {
|
||||
return max_sigma;
|
||||
}
|
||||
};
|
||||
|
|
@ -470,24 +470,24 @@ struct DiscreteFlowDenoiser : public Denoiser {
|
|||
}
|
||||
}
|
||||
|
||||
float sigma_min() {
|
||||
float sigma_min() override {
|
||||
return sigmas[0];
|
||||
}
|
||||
|
||||
float sigma_max() {
|
||||
float sigma_max() override {
|
||||
return sigmas[TIMESTEPS - 1];
|
||||
}
|
||||
|
||||
float sigma_to_t(float sigma) {
|
||||
float sigma_to_t(float sigma) override {
|
||||
return sigma * 1000.f;
|
||||
}
|
||||
|
||||
float t_to_sigma(float t) {
|
||||
float t_to_sigma(float t) override {
|
||||
t = t + 1;
|
||||
return time_snr_shift(shift, t / 1000.f);
|
||||
}
|
||||
|
||||
std::vector<float> get_scalings(float sigma) {
|
||||
std::vector<float> get_scalings(float sigma) override {
|
||||
float c_skip = 1.0f;
|
||||
float c_out = -sigma;
|
||||
float c_in = 1.0f;
|
||||
|
|
@ -495,14 +495,14 @@ struct DiscreteFlowDenoiser : public Denoiser {
|
|||
}
|
||||
|
||||
// this function will modify noise/latent
|
||||
ggml_tensor* noise_scaling(float sigma, ggml_tensor* noise, ggml_tensor* latent) {
|
||||
ggml_tensor* noise_scaling(float sigma, ggml_tensor* noise, ggml_tensor* latent) override {
|
||||
ggml_tensor_scale(noise, sigma);
|
||||
ggml_tensor_scale(latent, 1.0f - sigma);
|
||||
ggml_tensor_add(latent, noise);
|
||||
return latent;
|
||||
}
|
||||
|
||||
ggml_tensor* inverse_noise_scaling(float sigma, ggml_tensor* latent) {
|
||||
ggml_tensor* inverse_noise_scaling(float sigma, ggml_tensor* latent) override {
|
||||
ggml_tensor_scale(latent, 1.0f / (1.0f - sigma));
|
||||
return latent;
|
||||
}
|
||||
|
|
@ -529,24 +529,24 @@ struct FluxFlowDenoiser : public Denoiser {
|
|||
}
|
||||
}
|
||||
|
||||
float sigma_min() {
|
||||
float sigma_min() override {
|
||||
return sigmas[0];
|
||||
}
|
||||
|
||||
float sigma_max() {
|
||||
float sigma_max() override {
|
||||
return sigmas[TIMESTEPS - 1];
|
||||
}
|
||||
|
||||
float sigma_to_t(float sigma) {
|
||||
float sigma_to_t(float sigma) override {
|
||||
return sigma;
|
||||
}
|
||||
|
||||
float t_to_sigma(float t) {
|
||||
float t_to_sigma(float t) override {
|
||||
t = t + 1;
|
||||
return flux_time_shift(shift, 1.0f, t / TIMESTEPS);
|
||||
}
|
||||
|
||||
std::vector<float> get_scalings(float sigma) {
|
||||
std::vector<float> get_scalings(float sigma) override {
|
||||
float c_skip = 1.0f;
|
||||
float c_out = -sigma;
|
||||
float c_in = 1.0f;
|
||||
|
|
@ -554,14 +554,14 @@ struct FluxFlowDenoiser : public Denoiser {
|
|||
}
|
||||
|
||||
// this function will modify noise/latent
|
||||
ggml_tensor* noise_scaling(float sigma, ggml_tensor* noise, ggml_tensor* latent) {
|
||||
ggml_tensor* noise_scaling(float sigma, ggml_tensor* noise, ggml_tensor* latent) override {
|
||||
ggml_tensor_scale(noise, sigma);
|
||||
ggml_tensor_scale(latent, 1.0f - sigma);
|
||||
ggml_tensor_add(latent, noise);
|
||||
return latent;
|
||||
}
|
||||
|
||||
ggml_tensor* inverse_noise_scaling(float sigma, ggml_tensor* latent) {
|
||||
ggml_tensor* inverse_noise_scaling(float sigma, ggml_tensor* latent) override {
|
||||
ggml_tensor_scale(latent, 1.0f / (1.0f - sigma));
|
||||
return latent;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,18 +8,18 @@
|
|||
#include "wan.hpp"
|
||||
|
||||
struct DiffusionParams {
|
||||
struct ggml_tensor* x = NULL;
|
||||
struct ggml_tensor* timesteps = NULL;
|
||||
struct ggml_tensor* context = NULL;
|
||||
struct ggml_tensor* c_concat = NULL;
|
||||
struct ggml_tensor* y = NULL;
|
||||
struct ggml_tensor* guidance = NULL;
|
||||
struct ggml_tensor* x = nullptr;
|
||||
struct ggml_tensor* timesteps = nullptr;
|
||||
struct ggml_tensor* context = nullptr;
|
||||
struct ggml_tensor* c_concat = nullptr;
|
||||
struct ggml_tensor* y = nullptr;
|
||||
struct ggml_tensor* guidance = nullptr;
|
||||
std::vector<ggml_tensor*> ref_latents = {};
|
||||
bool increase_ref_index = false;
|
||||
int num_video_frames = -1;
|
||||
std::vector<struct ggml_tensor*> controls = {};
|
||||
float control_strength = 0.f;
|
||||
struct ggml_tensor* vace_context = NULL;
|
||||
struct ggml_tensor* vace_context = nullptr;
|
||||
float vace_strength = 1.f;
|
||||
std::vector<int> skip_layers = {};
|
||||
};
|
||||
|
|
@ -28,8 +28,8 @@ struct DiffusionModel {
|
|||
virtual std::string get_desc() = 0;
|
||||
virtual void compute(int n_threads,
|
||||
DiffusionParams diffusion_params,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) = 0;
|
||||
struct ggml_tensor** output = nullptr,
|
||||
struct ggml_context* output_ctx = nullptr) = 0;
|
||||
virtual void alloc_params_buffer() = 0;
|
||||
virtual void free_params_buffer() = 0;
|
||||
virtual void free_compute_buffer() = 0;
|
||||
|
|
@ -49,38 +49,38 @@ struct UNetModel : public DiffusionModel {
|
|||
: unet(backend, offload_params_to_cpu, tensor_types, "model.diffusion_model", version, flash_attn) {
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return unet.get_desc();
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
void alloc_params_buffer() override {
|
||||
unet.alloc_params_buffer();
|
||||
}
|
||||
|
||||
void free_params_buffer() {
|
||||
void free_params_buffer() override {
|
||||
unet.free_params_buffer();
|
||||
}
|
||||
|
||||
void free_compute_buffer() {
|
||||
void free_compute_buffer() override {
|
||||
unet.free_compute_buffer();
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
||||
unet.get_param_tensors(tensors, "model.diffusion_model");
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
size_t get_params_buffer_size() override {
|
||||
return unet.get_params_buffer_size();
|
||||
}
|
||||
|
||||
int64_t get_adm_in_channels() {
|
||||
int64_t get_adm_in_channels() override {
|
||||
return unet.unet.adm_in_channels;
|
||||
}
|
||||
|
||||
void compute(int n_threads,
|
||||
DiffusionParams diffusion_params,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
struct ggml_tensor** output = nullptr,
|
||||
struct ggml_context* output_ctx = nullptr) override {
|
||||
return unet.compute(n_threads,
|
||||
diffusion_params.x,
|
||||
diffusion_params.timesteps,
|
||||
|
|
@ -103,38 +103,38 @@ struct MMDiTModel : public DiffusionModel {
|
|||
: mmdit(backend, offload_params_to_cpu, flash_attn, tensor_types, "model.diffusion_model") {
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return mmdit.get_desc();
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
void alloc_params_buffer() override {
|
||||
mmdit.alloc_params_buffer();
|
||||
}
|
||||
|
||||
void free_params_buffer() {
|
||||
void free_params_buffer() override {
|
||||
mmdit.free_params_buffer();
|
||||
}
|
||||
|
||||
void free_compute_buffer() {
|
||||
void free_compute_buffer() override {
|
||||
mmdit.free_compute_buffer();
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
||||
mmdit.get_param_tensors(tensors, "model.diffusion_model");
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
size_t get_params_buffer_size() override {
|
||||
return mmdit.get_params_buffer_size();
|
||||
}
|
||||
|
||||
int64_t get_adm_in_channels() {
|
||||
int64_t get_adm_in_channels() override {
|
||||
return 768 + 1280;
|
||||
}
|
||||
|
||||
void compute(int n_threads,
|
||||
DiffusionParams diffusion_params,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
struct ggml_tensor** output = nullptr,
|
||||
struct ggml_context* output_ctx = nullptr) override {
|
||||
return mmdit.compute(n_threads,
|
||||
diffusion_params.x,
|
||||
diffusion_params.timesteps,
|
||||
|
|
@ -158,38 +158,38 @@ struct FluxModel : public DiffusionModel {
|
|||
: flux(backend, offload_params_to_cpu, tensor_types, "model.diffusion_model", version, flash_attn, use_mask) {
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return flux.get_desc();
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
void alloc_params_buffer() override {
|
||||
flux.alloc_params_buffer();
|
||||
}
|
||||
|
||||
void free_params_buffer() {
|
||||
void free_params_buffer() override {
|
||||
flux.free_params_buffer();
|
||||
}
|
||||
|
||||
void free_compute_buffer() {
|
||||
void free_compute_buffer() override {
|
||||
flux.free_compute_buffer();
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
||||
flux.get_param_tensors(tensors, "model.diffusion_model");
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
size_t get_params_buffer_size() override {
|
||||
return flux.get_params_buffer_size();
|
||||
}
|
||||
|
||||
int64_t get_adm_in_channels() {
|
||||
int64_t get_adm_in_channels() override {
|
||||
return 768;
|
||||
}
|
||||
|
||||
void compute(int n_threads,
|
||||
DiffusionParams diffusion_params,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
struct ggml_tensor** output = nullptr,
|
||||
struct ggml_context* output_ctx = nullptr) override {
|
||||
return flux.compute(n_threads,
|
||||
diffusion_params.x,
|
||||
diffusion_params.timesteps,
|
||||
|
|
@ -218,45 +218,45 @@ struct WanModel : public DiffusionModel {
|
|||
: prefix(prefix), wan(backend, offload_params_to_cpu, tensor_types, prefix, version, flash_attn) {
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return wan.get_desc();
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
void alloc_params_buffer() override {
|
||||
wan.alloc_params_buffer();
|
||||
}
|
||||
|
||||
void free_params_buffer() {
|
||||
void free_params_buffer() override {
|
||||
wan.free_params_buffer();
|
||||
}
|
||||
|
||||
void free_compute_buffer() {
|
||||
void free_compute_buffer() override {
|
||||
wan.free_compute_buffer();
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
||||
wan.get_param_tensors(tensors, prefix);
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
size_t get_params_buffer_size() override {
|
||||
return wan.get_params_buffer_size();
|
||||
}
|
||||
|
||||
int64_t get_adm_in_channels() {
|
||||
int64_t get_adm_in_channels() override {
|
||||
return 768;
|
||||
}
|
||||
|
||||
void compute(int n_threads,
|
||||
DiffusionParams diffusion_params,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
struct ggml_tensor** output = nullptr,
|
||||
struct ggml_context* output_ctx = nullptr) override {
|
||||
return wan.compute(n_threads,
|
||||
diffusion_params.x,
|
||||
diffusion_params.timesteps,
|
||||
diffusion_params.context,
|
||||
diffusion_params.y,
|
||||
diffusion_params.c_concat,
|
||||
NULL,
|
||||
nullptr,
|
||||
diffusion_params.vace_context,
|
||||
diffusion_params.vace_strength,
|
||||
output,
|
||||
|
|
@ -277,38 +277,38 @@ struct QwenImageModel : public DiffusionModel {
|
|||
: prefix(prefix), qwen_image(backend, offload_params_to_cpu, tensor_types, prefix, version, flash_attn) {
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return qwen_image.get_desc();
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
void alloc_params_buffer() override {
|
||||
qwen_image.alloc_params_buffer();
|
||||
}
|
||||
|
||||
void free_params_buffer() {
|
||||
void free_params_buffer() override {
|
||||
qwen_image.free_params_buffer();
|
||||
}
|
||||
|
||||
void free_compute_buffer() {
|
||||
void free_compute_buffer() override {
|
||||
qwen_image.free_compute_buffer();
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
||||
qwen_image.get_param_tensors(tensors, prefix);
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
size_t get_params_buffer_size() override {
|
||||
return qwen_image.get_params_buffer_size();
|
||||
}
|
||||
|
||||
int64_t get_adm_in_channels() {
|
||||
int64_t get_adm_in_channels() override {
|
||||
return 768;
|
||||
}
|
||||
|
||||
void compute(int n_threads,
|
||||
DiffusionParams diffusion_params,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
struct ggml_tensor** output = nullptr,
|
||||
struct ggml_context* output_ctx = nullptr) override {
|
||||
return qwen_image.compute(n_threads,
|
||||
diffusion_params.x,
|
||||
diffusion_params.timesteps,
|
||||
|
|
|
|||
|
|
@ -174,7 +174,7 @@ struct ESRGAN : public GGMLRunner {
|
|||
}
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return "esrgan";
|
||||
}
|
||||
|
||||
|
|
@ -367,7 +367,7 @@ struct ESRGAN : public GGMLRunner {
|
|||
void compute(const int n_threads,
|
||||
struct ggml_tensor* x,
|
||||
ggml_tensor** output,
|
||||
ggml_context* output_ctx = NULL) {
|
||||
ggml_context* output_ctx = nullptr) {
|
||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_graph(x);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef __FLUX_HPP__
|
||||
#define __FLUX_HPP__
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "ggml_extend.hpp"
|
||||
|
|
@ -18,7 +19,7 @@ namespace Flux {
|
|||
blocks["out_layer"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_dim, hidden_dim, true));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
// x: [..., in_dim]
|
||||
// return: [..., hidden_dim]
|
||||
auto in_layer = std::dynamic_pointer_cast<Linear>(blocks["in_layer"]);
|
||||
|
|
@ -36,7 +37,7 @@ namespace Flux {
|
|||
int64_t hidden_size;
|
||||
float eps;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
|
||||
ggml_type wtype = GGML_TYPE_F32;
|
||||
params["scale"] = ggml_new_tensor_1d(ctx, wtype, hidden_size);
|
||||
}
|
||||
|
|
@ -47,7 +48,7 @@ namespace Flux {
|
|||
: hidden_size(hidden_size),
|
||||
eps(eps) {}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
struct ggml_tensor* w = params["scale"];
|
||||
x = ggml_rms_norm(ctx, x, eps);
|
||||
x = ggml_mul(ctx, x, w);
|
||||
|
|
@ -136,11 +137,11 @@ namespace Flux {
|
|||
};
|
||||
|
||||
struct ModulationOut {
|
||||
ggml_tensor* shift = NULL;
|
||||
ggml_tensor* scale = NULL;
|
||||
ggml_tensor* gate = NULL;
|
||||
ggml_tensor* shift = nullptr;
|
||||
ggml_tensor* scale = nullptr;
|
||||
ggml_tensor* gate = nullptr;
|
||||
|
||||
ModulationOut(ggml_tensor* shift = NULL, ggml_tensor* scale = NULL, ggml_tensor* gate = NULL)
|
||||
ModulationOut(ggml_tensor* shift = nullptr, ggml_tensor* scale = nullptr, ggml_tensor* gate = nullptr)
|
||||
: shift(shift), scale(scale), gate(gate) {}
|
||||
|
||||
ModulationOut(struct ggml_context* ctx, ggml_tensor* vec, int64_t offset) {
|
||||
|
|
@ -259,7 +260,7 @@ namespace Flux {
|
|||
struct ggml_tensor* txt,
|
||||
struct ggml_tensor* vec,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* mask = NULL) {
|
||||
struct ggml_tensor* mask = nullptr) {
|
||||
// img: [N, n_img_token, hidden_size]
|
||||
// txt: [N, n_txt_token, hidden_size]
|
||||
// pe: [n_img_token + n_txt_token, d_head/2, 2, 2]
|
||||
|
|
@ -398,7 +399,7 @@ namespace Flux {
|
|||
|
||||
ModulationOut get_distil_mod(struct ggml_context* ctx, struct ggml_tensor* vec) {
|
||||
int64_t offset = 3 * idx;
|
||||
return ModulationOut(ctx, vec, offset);
|
||||
return {ctx, vec, offset};
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
|
|
@ -406,7 +407,7 @@ namespace Flux {
|
|||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* vec,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* mask = NULL) {
|
||||
struct ggml_tensor* mask = nullptr) {
|
||||
// x: [N, n_token, hidden_size]
|
||||
// pe: [n_token, d_head/2, 2, 2]
|
||||
// return: [N, n_token, hidden_size]
|
||||
|
|
@ -485,7 +486,7 @@ namespace Flux {
|
|||
auto shift = ggml_view_2d(ctx, vec, vec->ne[0], vec->ne[1], vec->nb[1], stride * (offset + 0)); // [N, dim]
|
||||
auto scale = ggml_view_2d(ctx, vec, vec->ne[0], vec->ne[1], vec->nb[1], stride * (offset + 1)); // [N, dim]
|
||||
// No gate
|
||||
return ModulationOut(shift, scale, NULL);
|
||||
return {shift, scale, nullptr};
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
|
|
@ -664,7 +665,7 @@ namespace Flux {
|
|||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* mod_index_arange = NULL,
|
||||
struct ggml_tensor* mod_index_arange = nullptr,
|
||||
std::vector<int> skip_layers = {}) {
|
||||
auto img_in = std::dynamic_pointer_cast<Linear>(blocks["img_in"]);
|
||||
auto txt_in = std::dynamic_pointer_cast<Linear>(blocks["txt_in"]);
|
||||
|
|
@ -672,7 +673,7 @@ namespace Flux {
|
|||
|
||||
img = img_in->forward(ctx, img);
|
||||
struct ggml_tensor* vec;
|
||||
struct ggml_tensor* txt_img_mask = NULL;
|
||||
struct ggml_tensor* txt_img_mask = nullptr;
|
||||
if (params.is_chroma) {
|
||||
int64_t mod_index_length = 344;
|
||||
auto approx = std::dynamic_pointer_cast<ChromaApproximator>(blocks["distilled_guidance_layer"]);
|
||||
|
|
@ -681,7 +682,7 @@ namespace Flux {
|
|||
|
||||
// auto mod_index_arange = ggml_arange(ctx, 0, (float)mod_index_length, 1);
|
||||
// ggml_arange tot working on a lot of backends, precomputing it on CPU instead
|
||||
GGML_ASSERT(arange != NULL);
|
||||
GGML_ASSERT(arange != nullptr);
|
||||
auto modulation_index = ggml_nn_timestep_embedding(ctx, mod_index_arange, 32, 10000, 1000.f); // [1, 344, 32]
|
||||
|
||||
// Batch broadcast (will it ever be useful)
|
||||
|
|
@ -695,7 +696,7 @@ namespace Flux {
|
|||
vec = ggml_cont(ctx, ggml_permute(ctx, vec, 0, 2, 1, 3)); // [344, N, 64]
|
||||
vec = approx->forward(ctx, vec); // [344, N, hidden_size]
|
||||
|
||||
if (y != NULL) {
|
||||
if (y != nullptr) {
|
||||
txt_img_mask = ggml_pad(ctx, y, img->ne[1], 0, 0, 0);
|
||||
}
|
||||
} else {
|
||||
|
|
@ -703,7 +704,7 @@ namespace Flux {
|
|||
auto vector_in = std::dynamic_pointer_cast<MLPEmbedder>(blocks["vector_in"]);
|
||||
vec = time_in->forward(ctx, ggml_nn_timestep_embedding(ctx, timesteps, 256, 10000, 1000.f));
|
||||
if (params.guidance_embed) {
|
||||
GGML_ASSERT(guidance != NULL);
|
||||
GGML_ASSERT(guidance != nullptr);
|
||||
auto guidance_in = std::dynamic_pointer_cast<MLPEmbedder>(blocks["guidance_in"]);
|
||||
// bf16 and fp16 result is different
|
||||
auto g_in = ggml_nn_timestep_embedding(ctx, guidance, 256, 10000, 1000.f);
|
||||
|
|
@ -775,14 +776,14 @@ namespace Flux {
|
|||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* mod_index_arange = NULL,
|
||||
struct ggml_tensor* mod_index_arange = nullptr,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
std::vector<int> skip_layers = {}) {
|
||||
// Forward pass of DiT.
|
||||
// x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
|
||||
// timestep: (N,) tensor of diffusion timesteps
|
||||
// context: (N, L, D)
|
||||
// c_concat: NULL, or for (N,C+M, H, W) for Fill
|
||||
// c_concat: nullptr, or for (N,C+M, H, W) for Fill
|
||||
// y: (N, adm_in_channels) tensor of class labels
|
||||
// guidance: (N,)
|
||||
// pe: (L, d_head/2, 2, 2)
|
||||
|
|
@ -801,7 +802,7 @@ namespace Flux {
|
|||
uint64_t img_tokens = img->ne[1];
|
||||
|
||||
if (params.version == VERSION_FLUX_FILL) {
|
||||
GGML_ASSERT(c_concat != NULL);
|
||||
GGML_ASSERT(c_concat != nullptr);
|
||||
ggml_tensor* masked = ggml_view_4d(ctx, c_concat, c_concat->ne[0], c_concat->ne[1], C, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], 0);
|
||||
ggml_tensor* mask = ggml_view_4d(ctx, c_concat, c_concat->ne[0], c_concat->ne[1], 8 * 8, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], c_concat->nb[2] * C);
|
||||
|
||||
|
|
@ -810,7 +811,7 @@ namespace Flux {
|
|||
|
||||
img = ggml_concat(ctx, img, ggml_concat(ctx, masked, mask, 0), 0);
|
||||
} else if (params.version == VERSION_FLEX_2) {
|
||||
GGML_ASSERT(c_concat != NULL);
|
||||
GGML_ASSERT(c_concat != nullptr);
|
||||
ggml_tensor* masked = ggml_view_4d(ctx, c_concat, c_concat->ne[0], c_concat->ne[1], C, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], 0);
|
||||
ggml_tensor* mask = ggml_view_4d(ctx, c_concat, c_concat->ne[0], c_concat->ne[1], 1, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], c_concat->nb[2] * C);
|
||||
ggml_tensor* control = ggml_view_4d(ctx, c_concat, c_concat->ne[0], c_concat->ne[1], C, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], c_concat->nb[2] * (C + 1));
|
||||
|
|
@ -825,7 +826,7 @@ namespace Flux {
|
|||
|
||||
img = ggml_concat(ctx, img, ggml_concat(ctx, ggml_concat(ctx, masked, mask, 0), control, 0), 0);
|
||||
} else if (params.version == VERSION_FLUX_CONTROLS) {
|
||||
GGML_ASSERT(c_concat != NULL);
|
||||
GGML_ASSERT(c_concat != nullptr);
|
||||
|
||||
ggml_tensor* control = ggml_pad(ctx, c_concat, pad_w, pad_h, 0, 0);
|
||||
control = patchify(ctx, control, patch_size);
|
||||
|
|
@ -924,7 +925,7 @@ namespace Flux {
|
|||
flux.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return "flux";
|
||||
}
|
||||
|
||||
|
|
@ -944,18 +945,18 @@ namespace Flux {
|
|||
GGML_ASSERT(x->ne[3] == 1);
|
||||
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, FLUX_GRAPH_SIZE, false);
|
||||
|
||||
struct ggml_tensor* mod_index_arange = NULL;
|
||||
struct ggml_tensor* mod_index_arange = nullptr;
|
||||
|
||||
x = to_backend(x);
|
||||
context = to_backend(context);
|
||||
if (c_concat != NULL) {
|
||||
if (c_concat != nullptr) {
|
||||
c_concat = to_backend(c_concat);
|
||||
}
|
||||
if (flux_params.is_chroma) {
|
||||
guidance = ggml_set_f32(guidance, 0);
|
||||
|
||||
if (!use_mask) {
|
||||
y = NULL;
|
||||
y = nullptr;
|
||||
}
|
||||
|
||||
// ggml_arange is not working on some backends, precompute it
|
||||
|
|
@ -987,7 +988,7 @@ namespace Flux {
|
|||
auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, flux_params.axes_dim_sum / 2, pos_len);
|
||||
// pe->data = pe_vec.data();
|
||||
// print_ggml_tensor(pe);
|
||||
// pe->data = NULL;
|
||||
// pe->data = nullptr;
|
||||
set_backend_tensor_data(pe, pe_vec.data());
|
||||
|
||||
struct ggml_tensor* out = flux.forward(compute_ctx,
|
||||
|
|
@ -1017,8 +1018,8 @@ namespace Flux {
|
|||
struct ggml_tensor* guidance,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
bool increase_ref_index = false,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL,
|
||||
struct ggml_tensor** output = nullptr,
|
||||
struct ggml_context* output_ctx = nullptr,
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
// x: [N, in_channels, h, w]
|
||||
// timesteps: [N, ]
|
||||
|
|
@ -1035,11 +1036,11 @@ namespace Flux {
|
|||
void test() {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(20 * 1024 * 1024); // 20 MB
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
|
||||
struct ggml_context* work_ctx = ggml_init(params);
|
||||
GGML_ASSERT(work_ctx != NULL);
|
||||
GGML_ASSERT(work_ctx != nullptr);
|
||||
|
||||
{
|
||||
// cpu f16:
|
||||
|
|
@ -1063,10 +1064,10 @@ namespace Flux {
|
|||
ggml_set_f32(y, 0.01f);
|
||||
// print_ggml_tensor(y);
|
||||
|
||||
struct ggml_tensor* out = NULL;
|
||||
struct ggml_tensor* out = nullptr;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
compute(8, x, timesteps, context, NULL, y, guidance, {}, false, &out, work_ctx);
|
||||
compute(8, x, timesteps, context, nullptr, y, guidance, {}, false, &out, work_ctx);
|
||||
int t1 = ggml_time_ms();
|
||||
|
||||
print_ggml_tensor(out);
|
||||
|
|
@ -1078,7 +1079,7 @@ namespace Flux {
|
|||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_Q8_0;
|
||||
std::shared_ptr<FluxRunner> flux = std::shared_ptr<FluxRunner>(new FluxRunner(backend, false));
|
||||
std::shared_ptr<FluxRunner> flux = std::make_shared<FluxRunner>(backend, false);
|
||||
{
|
||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_mul_n_mode(struct ggml_context* ctx,
|
|||
return result;
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ struct ggml_tensor* ggml_merge_lora(ggml_context* ctx, struct ggml_tensor* lora_down, struct ggml_tensor* lora_up, struct ggml_tensor* lora_mid = NULL) {
|
||||
__STATIC_INLINE__ struct ggml_tensor* ggml_merge_lora(ggml_context* ctx, struct ggml_tensor* lora_down, struct ggml_tensor* lora_up, struct ggml_tensor* lora_mid = nullptr) {
|
||||
struct ggml_tensor* updown;
|
||||
// flat lora tensors to multiply it
|
||||
int64_t lora_up_rows = lora_up->ne[ggml_n_dims(lora_up) - 1];
|
||||
|
|
@ -119,7 +119,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_merge_lora(ggml_context* ctx, struct
|
|||
|
||||
// ggml_mul_mat requires tensor b transposed
|
||||
lora_down = ggml_cont(ctx, ggml_transpose(ctx, lora_down));
|
||||
if (lora_mid == NULL) {
|
||||
if (lora_mid == nullptr) {
|
||||
updown = ggml_mul_mat(ctx, lora_up, lora_down);
|
||||
updown = ggml_cont(ctx, ggml_transpose(ctx, updown));
|
||||
} else {
|
||||
|
|
@ -166,7 +166,7 @@ __STATIC_INLINE__ void ggml_tensor_set_f32(struct ggml_tensor* tensor, float val
|
|||
}
|
||||
|
||||
__STATIC_INLINE__ float ggml_tensor_get_f32(const ggml_tensor* tensor, int l, int k = 0, int j = 0, int i = 0) {
|
||||
if (tensor->buffer != NULL) {
|
||||
if (tensor->buffer != nullptr) {
|
||||
float value;
|
||||
ggml_backend_tensor_get(tensor, &value, i * tensor->nb[3] + j * tensor->nb[2] + k * tensor->nb[1] + l * tensor->nb[0], sizeof(float));
|
||||
return value;
|
||||
|
|
@ -176,7 +176,7 @@ __STATIC_INLINE__ float ggml_tensor_get_f32(const ggml_tensor* tensor, int l, in
|
|||
}
|
||||
|
||||
__STATIC_INLINE__ int ggml_tensor_get_i32(const ggml_tensor* tensor, int l, int k = 0, int j = 0, int i = 0) {
|
||||
if (tensor->buffer != NULL) {
|
||||
if (tensor->buffer != nullptr) {
|
||||
float value;
|
||||
ggml_backend_tensor_get(tensor, &value, i * tensor->nb[3] + j * tensor->nb[2] + k * tensor->nb[1] + l * tensor->nb[0], sizeof(int));
|
||||
return value;
|
||||
|
|
@ -298,7 +298,7 @@ __STATIC_INLINE__ ggml_tensor* load_tensor_from_file(ggml_context* ctx, const st
|
|||
std::ifstream file(fpath, std::ios::binary);
|
||||
if (!file.is_open()) {
|
||||
LOG_ERROR("failed to open '%s'", file_path.c_str());
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
int32_t n_dims;
|
||||
int32_t length;
|
||||
|
|
@ -312,7 +312,7 @@ __STATIC_INLINE__ ggml_tensor* load_tensor_from_file(ggml_context* ctx, const st
|
|||
|
||||
if (file.eof()) {
|
||||
LOG_ERROR("incomplete file '%s'", file_path.c_str());
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int32_t nelements = 1;
|
||||
|
|
@ -360,7 +360,7 @@ __STATIC_INLINE__ void copy_ggml_tensor(struct ggml_tensor* dst, struct ggml_ten
|
|||
}
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = 10 * 1024 * 1024; // for padding
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
struct ggml_context* ctx = ggml_init(params);
|
||||
if (!ctx) {
|
||||
|
|
@ -866,7 +866,7 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input,
|
|||
params.mem_size += input_tile_size_x * input_tile_size_y * input->ne[2] * input->ne[3] * sizeof(float); // input chunk
|
||||
params.mem_size += output_tile_size_x * output_tile_size_y * output->ne[2] * output->ne[3] * sizeof(float); // output chunk
|
||||
params.mem_size += 3 * ggml_tensor_overhead();
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
|
||||
LOG_DEBUG("tile work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f);
|
||||
|
|
@ -967,7 +967,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_linear(struct ggml_context* ctx,
|
|||
if (scale != 1.f) {
|
||||
x = ggml_scale(ctx, x, 1.f / scale);
|
||||
}
|
||||
if (b != NULL) {
|
||||
if (b != nullptr) {
|
||||
x = ggml_add_inplace(ctx, x, b);
|
||||
}
|
||||
return x;
|
||||
|
|
@ -1000,7 +1000,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_2d(struct ggml_context* ctx,
|
|||
if (scale != 1.f) {
|
||||
x = ggml_scale(ctx, x, 1.f / scale);
|
||||
}
|
||||
if (b != NULL) {
|
||||
if (b != nullptr) {
|
||||
b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
|
||||
x = ggml_add_inplace(ctx, x, b);
|
||||
}
|
||||
|
|
@ -1029,7 +1029,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_3d(struct ggml_context* ctx,
|
|||
int64_t N = x->ne[3] / IC;
|
||||
x = ggml_conv_3d(ctx, w, x, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2);
|
||||
|
||||
if (b != NULL) {
|
||||
if (b != nullptr) {
|
||||
b = ggml_reshape_4d(ctx, b, 1, 1, 1, b->ne[0]); // [OC, 1, 1, 1]
|
||||
x = ggml_add_inplace(ctx, x, b);
|
||||
}
|
||||
|
|
@ -1048,7 +1048,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_3d_nx1x1(struct ggml_context*
|
|||
int p2 = 1,
|
||||
int d2 = 1) {
|
||||
x = ggml_conv_2d(ctx, w, x, 1, s2, 0, p2, 1, d2); // [N, OC, T, OH * OW]
|
||||
if (b != NULL) {
|
||||
if (b != nullptr) {
|
||||
b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
|
||||
x = ggml_add(ctx, x, b);
|
||||
}
|
||||
|
|
@ -1152,7 +1152,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
|
|||
struct ggml_tensor* k,
|
||||
struct ggml_tensor* v,
|
||||
int64_t n_head,
|
||||
struct ggml_tensor* mask = NULL,
|
||||
struct ggml_tensor* mask = nullptr,
|
||||
bool diag_mask_inf = false,
|
||||
bool skip_reshape = false,
|
||||
bool flash_attn = false, // avoid overflow
|
||||
|
|
@ -1299,9 +1299,9 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_layer_norm(struct ggml_context* ct
|
|||
struct ggml_tensor* b,
|
||||
float eps = EPS) {
|
||||
x = ggml_norm(ctx, x, eps);
|
||||
if (w != NULL) {
|
||||
if (w != nullptr) {
|
||||
x = ggml_mul_inplace(ctx, x, w);
|
||||
if (b != NULL) {
|
||||
if (b != nullptr) {
|
||||
x = ggml_add_inplace(ctx, x, b);
|
||||
}
|
||||
}
|
||||
|
|
@ -1313,14 +1313,14 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
|
|||
struct ggml_tensor* w,
|
||||
struct ggml_tensor* b,
|
||||
int num_groups = 32) {
|
||||
if (ggml_n_dims(x) >= 3 && w != NULL && b != NULL) {
|
||||
if (ggml_n_dims(x) >= 3 && w != nullptr && b != nullptr) {
|
||||
w = ggml_reshape_4d(ctx, w, 1, 1, w->ne[0], 1);
|
||||
b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
|
||||
}
|
||||
|
||||
const float eps = 1e-6f; // default eps parameter
|
||||
x = ggml_group_norm(ctx, x, num_groups, eps);
|
||||
if (w != NULL && b != NULL) {
|
||||
if (w != nullptr && b != nullptr) {
|
||||
x = ggml_mul_inplace(ctx, x, w);
|
||||
// b = ggml_repeat(ctx, b, x);
|
||||
x = ggml_add_inplace(ctx, x, b);
|
||||
|
|
@ -1428,7 +1428,7 @@ __STATIC_INLINE__ struct ggml_tensor* new_timestep_embedding(struct ggml_context
|
|||
// embedding: [N, dim]
|
||||
std::vector<float> embedding_vec = timestep_embedding(timesteps, dim, max_period);
|
||||
struct ggml_tensor* embedding = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps.size());
|
||||
if (embedding->data != NULL) {
|
||||
if (embedding->data != nullptr) {
|
||||
memcpy(((char*)embedding->data), ((char*)embedding_vec.data()), ggml_nbytes(embedding));
|
||||
} else {
|
||||
ggml_backend_tensor_set(embedding, embedding_vec.data(), 0, ggml_nbytes(embedding));
|
||||
|
|
@ -1464,23 +1464,23 @@ struct GGMLRunner {
|
|||
protected:
|
||||
typedef std::function<struct ggml_cgraph*()> get_graph_cb_t;
|
||||
|
||||
ggml_backend_t params_backend = NULL;
|
||||
ggml_backend_t runtime_backend = NULL;
|
||||
ggml_backend_t params_backend = nullptr;
|
||||
ggml_backend_t runtime_backend = nullptr;
|
||||
|
||||
struct ggml_context* params_ctx = NULL;
|
||||
ggml_backend_buffer_t params_buffer = NULL;
|
||||
struct ggml_context* offload_ctx = NULL;
|
||||
ggml_backend_buffer_t runtime_params_buffer = NULL;
|
||||
struct ggml_context* params_ctx = nullptr;
|
||||
ggml_backend_buffer_t params_buffer = nullptr;
|
||||
struct ggml_context* offload_ctx = nullptr;
|
||||
ggml_backend_buffer_t runtime_params_buffer = nullptr;
|
||||
bool params_on_runtime_backend = false;
|
||||
|
||||
struct ggml_context* cache_ctx = NULL;
|
||||
ggml_backend_buffer_t cache_buffer = NULL;
|
||||
struct ggml_context* cache_ctx = nullptr;
|
||||
ggml_backend_buffer_t cache_buffer = nullptr;
|
||||
|
||||
struct ggml_context* compute_ctx = NULL;
|
||||
struct ggml_gallocr* compute_allocr = NULL;
|
||||
struct ggml_context* compute_ctx = nullptr;
|
||||
struct ggml_gallocr* compute_allocr = nullptr;
|
||||
|
||||
std::vector<float> one_vec = {1.f};
|
||||
ggml_tensor* one_tensor = NULL;
|
||||
ggml_tensor* one_tensor = nullptr;
|
||||
|
||||
std::map<struct ggml_tensor*, const void*> backend_tensor_data_map;
|
||||
std::map<std::string, struct ggml_tensor*> cache_tensor_map; // name -> tensor
|
||||
|
|
@ -1489,59 +1489,59 @@ protected:
|
|||
void alloc_params_ctx() {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(MAX_PARAMS_TENSOR_NUM * ggml_tensor_overhead());
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = true;
|
||||
|
||||
params_ctx = ggml_init(params);
|
||||
GGML_ASSERT(params_ctx != NULL);
|
||||
GGML_ASSERT(params_ctx != nullptr);
|
||||
if (params_backend != runtime_backend) {
|
||||
offload_ctx = ggml_init(params);
|
||||
GGML_ASSERT(offload_ctx != NULL);
|
||||
GGML_ASSERT(offload_ctx != nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
void free_params_ctx() {
|
||||
if (params_ctx != NULL) {
|
||||
if (params_ctx != nullptr) {
|
||||
ggml_free(params_ctx);
|
||||
params_ctx = NULL;
|
||||
params_ctx = nullptr;
|
||||
}
|
||||
if (offload_ctx != NULL) {
|
||||
if (offload_ctx != nullptr) {
|
||||
ggml_free(offload_ctx);
|
||||
offload_ctx = NULL;
|
||||
offload_ctx = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void alloc_cache_ctx() {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(MAX_PARAMS_TENSOR_NUM * ggml_tensor_overhead());
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = true;
|
||||
|
||||
cache_ctx = ggml_init(params);
|
||||
GGML_ASSERT(cache_ctx != NULL);
|
||||
GGML_ASSERT(cache_ctx != nullptr);
|
||||
}
|
||||
|
||||
void free_cache_ctx() {
|
||||
if (cache_ctx != NULL) {
|
||||
if (cache_ctx != nullptr) {
|
||||
ggml_free(cache_ctx);
|
||||
cache_ctx = NULL;
|
||||
cache_ctx = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void alloc_compute_ctx() {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(ggml_tensor_overhead() * MAX_GRAPH_SIZE + ggml_graph_overhead());
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = true;
|
||||
|
||||
compute_ctx = ggml_init(params);
|
||||
GGML_ASSERT(compute_ctx != NULL);
|
||||
GGML_ASSERT(compute_ctx != nullptr);
|
||||
}
|
||||
|
||||
void free_compute_ctx() {
|
||||
if (compute_ctx != NULL) {
|
||||
if (compute_ctx != nullptr) {
|
||||
ggml_free(compute_ctx);
|
||||
compute_ctx = NULL;
|
||||
compute_ctx = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1565,7 +1565,7 @@ protected:
|
|||
}
|
||||
|
||||
bool alloc_compute_buffer(get_graph_cb_t get_graph) {
|
||||
if (compute_allocr != NULL) {
|
||||
if (compute_allocr != nullptr) {
|
||||
return true;
|
||||
}
|
||||
reset_compute_ctx();
|
||||
|
|
@ -1590,9 +1590,9 @@ protected:
|
|||
}
|
||||
|
||||
void free_cache_buffer() {
|
||||
if (cache_buffer != NULL) {
|
||||
if (cache_buffer != nullptr) {
|
||||
ggml_backend_buffer_free(cache_buffer);
|
||||
cache_buffer = NULL;
|
||||
cache_buffer = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1602,7 +1602,7 @@ protected:
|
|||
}
|
||||
free_cache_ctx_and_buffer();
|
||||
alloc_cache_ctx();
|
||||
GGML_ASSERT(cache_buffer == NULL);
|
||||
GGML_ASSERT(cache_buffer == nullptr);
|
||||
std::map<ggml_tensor*, ggml_tensor*> runtime_tensor_to_cache_tensor;
|
||||
for (auto kv : cache_tensor_map) {
|
||||
auto cache_tensor = ggml_dup_tensor(cache_ctx, kv.second);
|
||||
|
|
@ -1611,7 +1611,7 @@ protected:
|
|||
}
|
||||
size_t num_tensors = ggml_tensor_num(cache_ctx);
|
||||
cache_buffer = ggml_backend_alloc_ctx_tensors(cache_ctx, runtime_backend);
|
||||
GGML_ASSERT(cache_buffer != NULL);
|
||||
GGML_ASSERT(cache_buffer != nullptr);
|
||||
for (auto kv : runtime_tensor_to_cache_tensor) {
|
||||
ggml_backend_tensor_copy(kv.first, kv.second);
|
||||
}
|
||||
|
|
@ -1643,12 +1643,12 @@ protected:
|
|||
if (params_on_runtime_backend) {
|
||||
return true;
|
||||
}
|
||||
GGML_ASSERT(runtime_params_buffer == NULL);
|
||||
GGML_ASSERT(runtime_params_buffer == nullptr);
|
||||
int64_t t0 = ggml_time_ms();
|
||||
size_t num_tensors = ggml_tensor_num(offload_ctx);
|
||||
if (num_tensors == 0) {
|
||||
for (ggml_tensor* t = ggml_get_first_tensor(params_ctx); t != NULL; t = ggml_get_next_tensor(params_ctx, t)) {
|
||||
GGML_ASSERT(t->view_src == NULL);
|
||||
for (ggml_tensor* t = ggml_get_first_tensor(params_ctx); t != nullptr; t = ggml_get_next_tensor(params_ctx, t)) {
|
||||
GGML_ASSERT(t->view_src == nullptr);
|
||||
ggml_dup_tensor(offload_ctx, t);
|
||||
}
|
||||
}
|
||||
|
|
@ -1657,7 +1657,7 @@ protected:
|
|||
|
||||
runtime_params_buffer = ggml_backend_alloc_ctx_tensors(offload_ctx, runtime_backend);
|
||||
|
||||
if (runtime_params_buffer == NULL) {
|
||||
if (runtime_params_buffer == nullptr) {
|
||||
LOG_ERROR("%s alloc runtime params backend buffer failed, num_tensors = %i",
|
||||
get_desc().c_str(),
|
||||
num_tensors);
|
||||
|
|
@ -1667,7 +1667,7 @@ protected:
|
|||
ggml_tensor* t = ggml_get_first_tensor(params_ctx);
|
||||
ggml_tensor* offload_t = ggml_get_first_tensor(offload_ctx);
|
||||
|
||||
while (t != NULL && offload_t != NULL) {
|
||||
while (t != nullptr && offload_t != nullptr) {
|
||||
ggml_backend_tensor_copy(t, offload_t);
|
||||
std::swap(t->buffer, offload_t->buffer);
|
||||
std::swap(t->data, offload_t->data);
|
||||
|
|
@ -1699,21 +1699,21 @@ protected:
|
|||
ggml_tensor* t = ggml_get_first_tensor(params_ctx);
|
||||
ggml_tensor* offload_t = ggml_get_first_tensor(offload_ctx);
|
||||
|
||||
while (t != NULL && offload_t != NULL) {
|
||||
while (t != nullptr && offload_t != nullptr) {
|
||||
t->buffer = offload_t->buffer;
|
||||
t->data = offload_t->data;
|
||||
t->extra = offload_t->extra;
|
||||
offload_t->buffer = NULL;
|
||||
offload_t->data = NULL;
|
||||
offload_t->extra = NULL;
|
||||
offload_t->buffer = nullptr;
|
||||
offload_t->data = nullptr;
|
||||
offload_t->extra = nullptr;
|
||||
|
||||
t = ggml_get_next_tensor(params_ctx, t);
|
||||
offload_t = ggml_get_next_tensor(offload_ctx, offload_t);
|
||||
}
|
||||
|
||||
if (runtime_params_buffer != NULL) {
|
||||
if (runtime_params_buffer != nullptr) {
|
||||
ggml_backend_buffer_free(runtime_params_buffer);
|
||||
runtime_params_buffer = NULL;
|
||||
runtime_params_buffer = nullptr;
|
||||
}
|
||||
params_on_runtime_backend = false;
|
||||
}
|
||||
|
|
@ -1750,7 +1750,7 @@ public:
|
|||
bool alloc_params_buffer() {
|
||||
size_t num_tensors = ggml_tensor_num(params_ctx);
|
||||
params_buffer = ggml_backend_alloc_ctx_tensors(params_ctx, params_backend);
|
||||
if (params_buffer == NULL) {
|
||||
if (params_buffer == nullptr) {
|
||||
LOG_ERROR("%s alloc params backend buffer failed, num_tensors = %i",
|
||||
get_desc().c_str(),
|
||||
num_tensors);
|
||||
|
|
@ -1766,14 +1766,14 @@ public:
|
|||
}
|
||||
|
||||
void free_params_buffer() {
|
||||
if (params_buffer != NULL) {
|
||||
if (params_buffer != nullptr) {
|
||||
ggml_backend_buffer_free(params_buffer);
|
||||
params_buffer = NULL;
|
||||
params_buffer = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
if (params_buffer != NULL) {
|
||||
if (params_buffer != nullptr) {
|
||||
return ggml_backend_buffer_get_size(params_buffer);
|
||||
}
|
||||
return 0;
|
||||
|
|
@ -1785,9 +1785,9 @@ public:
|
|||
}
|
||||
|
||||
void free_compute_buffer() {
|
||||
if (compute_allocr != NULL) {
|
||||
if (compute_allocr != nullptr) {
|
||||
ggml_gallocr_free(compute_allocr);
|
||||
compute_allocr = NULL;
|
||||
compute_allocr = nullptr;
|
||||
}
|
||||
offload_params_to_params_backend();
|
||||
}
|
||||
|
|
@ -1798,12 +1798,12 @@ public:
|
|||
}
|
||||
|
||||
struct ggml_tensor* to_backend(struct ggml_tensor* tensor) {
|
||||
GGML_ASSERT(compute_ctx != NULL);
|
||||
if (tensor == NULL) {
|
||||
return NULL;
|
||||
GGML_ASSERT(compute_ctx != nullptr);
|
||||
if (tensor == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
// it's performing a compute, check if backend isn't cpu
|
||||
if (!ggml_backend_is_cpu(runtime_backend) && (tensor->buffer == NULL || ggml_backend_buffer_is_host(tensor->buffer))) {
|
||||
if (!ggml_backend_is_cpu(runtime_backend) && (tensor->buffer == nullptr || ggml_backend_buffer_is_host(tensor->buffer))) {
|
||||
// pass input tensors to gpu memory
|
||||
auto backend_tensor = ggml_dup_tensor(compute_ctx, tensor);
|
||||
|
||||
|
|
@ -1819,8 +1819,8 @@ public:
|
|||
}
|
||||
|
||||
struct ggml_tensor* get_cache_tensor_by_name(const std::string& name) {
|
||||
if (cache_ctx == NULL) {
|
||||
return NULL;
|
||||
if (cache_ctx == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
return ggml_get_tensor(cache_ctx, name.c_str());
|
||||
}
|
||||
|
|
@ -1828,8 +1828,8 @@ public:
|
|||
void compute(get_graph_cb_t get_graph,
|
||||
int n_threads,
|
||||
bool free_compute_buffer_immediately = true,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
struct ggml_tensor** output = nullptr,
|
||||
struct ggml_context* output_ctx = nullptr) {
|
||||
if (!offload_params_to_runtime_backend()) {
|
||||
LOG_ERROR("%s offload params to runtime backend failed", get_desc().c_str());
|
||||
return;
|
||||
|
|
@ -1848,12 +1848,12 @@ public:
|
|||
ggml_graph_print(gf);
|
||||
#endif
|
||||
copy_cache_tensors_to_cache_buffer();
|
||||
if (output != NULL) {
|
||||
if (output != nullptr) {
|
||||
auto result = ggml_get_tensor(compute_ctx, final_result_name.c_str());
|
||||
if (*output == NULL && output_ctx != NULL) {
|
||||
if (*output == nullptr && output_ctx != nullptr) {
|
||||
*output = ggml_dup_tensor(output_ctx, result);
|
||||
}
|
||||
if (*output != NULL) {
|
||||
if (*output != nullptr) {
|
||||
ggml_backend_tensor_get_and_sync(runtime_backend, result, (*output)->data, 0, ggml_nbytes(*output));
|
||||
}
|
||||
}
|
||||
|
|
@ -2000,7 +2000,7 @@ public:
|
|||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* w = params["weight"];
|
||||
struct ggml_tensor* b = NULL;
|
||||
struct ggml_tensor* b = nullptr;
|
||||
if (bias) {
|
||||
b = params["bias"];
|
||||
}
|
||||
|
|
@ -2104,7 +2104,7 @@ public:
|
|||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* w = params["weight"];
|
||||
struct ggml_tensor* b = NULL;
|
||||
struct ggml_tensor* b = nullptr;
|
||||
if (bias) {
|
||||
b = params["bias"];
|
||||
}
|
||||
|
|
@ -2162,7 +2162,7 @@ public:
|
|||
// result: [N, OC, OD, OH*OW]
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* w = params["weight"];
|
||||
struct ggml_tensor* b = NULL;
|
||||
struct ggml_tensor* b = nullptr;
|
||||
if (bias) {
|
||||
b = params["bias"];
|
||||
}
|
||||
|
|
@ -2211,7 +2211,7 @@ public:
|
|||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* w = params["weight"];
|
||||
struct ggml_tensor* b = NULL;
|
||||
struct ggml_tensor* b = nullptr;
|
||||
if (bias) {
|
||||
b = params["bias"];
|
||||
}
|
||||
|
|
@ -2251,8 +2251,8 @@ public:
|
|||
bias(bias) {}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* w = NULL;
|
||||
struct ggml_tensor* b = NULL;
|
||||
struct ggml_tensor* w = nullptr;
|
||||
struct ggml_tensor* b = nullptr;
|
||||
|
||||
if (elementwise_affine) {
|
||||
w = params["weight"];
|
||||
|
|
@ -2291,8 +2291,8 @@ public:
|
|||
affine(affine) {}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* w = NULL;
|
||||
struct ggml_tensor* b = NULL;
|
||||
struct ggml_tensor* w = nullptr;
|
||||
struct ggml_tensor* b = nullptr;
|
||||
if (affine) {
|
||||
w = params["weight"];
|
||||
b = params["bias"];
|
||||
|
|
@ -2375,7 +2375,7 @@ public:
|
|||
struct ggml_tensor* k = k_proj->forward(ctx, x);
|
||||
struct ggml_tensor* v = v_proj->forward(ctx, x);
|
||||
|
||||
x = ggml_nn_attention_ext(ctx, backend, q, k, v, n_head, NULL, mask); // [N, n_token, embed_dim]
|
||||
x = ggml_nn_attention_ext(ctx, backend, q, k, v, n_head, nullptr, mask); // [N, n_token, embed_dim]
|
||||
|
||||
x = out_proj->forward(ctx, x); // [N, n_token, embed_dim]
|
||||
return x;
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ struct LoraModel : public GGMLRunner {
|
|||
bool load_failed = false;
|
||||
bool applied = false;
|
||||
std::vector<int> zero_index_vec = {0};
|
||||
ggml_tensor* zero_index = NULL;
|
||||
ggml_tensor* zero_index = nullptr;
|
||||
enum lora_t type = REGULAR;
|
||||
|
||||
LoraModel(ggml_backend_t backend,
|
||||
|
|
@ -112,7 +112,7 @@ struct LoraModel : public GGMLRunner {
|
|||
}
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return "lora";
|
||||
}
|
||||
|
||||
|
|
@ -287,7 +287,7 @@ struct LoraModel : public GGMLRunner {
|
|||
if (is_qkvm_split) {
|
||||
key = key.substr(sizeof("SPLIT_L|") - 1);
|
||||
}
|
||||
struct ggml_tensor* updown = NULL;
|
||||
struct ggml_tensor* updown = nullptr;
|
||||
float scale_value = 1.0f;
|
||||
std::string full_key = lora_pre[type] + key;
|
||||
if (is_bias) {
|
||||
|
|
@ -314,13 +314,13 @@ struct LoraModel : public GGMLRunner {
|
|||
}
|
||||
std::string alpha_name = "";
|
||||
|
||||
ggml_tensor* hada_1_mid = NULL; // tau for tucker decomposition
|
||||
ggml_tensor* hada_1_up = NULL;
|
||||
ggml_tensor* hada_1_down = NULL;
|
||||
ggml_tensor* hada_1_mid = nullptr; // tau for tucker decomposition
|
||||
ggml_tensor* hada_1_up = nullptr;
|
||||
ggml_tensor* hada_1_down = nullptr;
|
||||
|
||||
ggml_tensor* hada_2_mid = NULL; // tau for tucker decomposition
|
||||
ggml_tensor* hada_2_up = NULL;
|
||||
ggml_tensor* hada_2_down = NULL;
|
||||
ggml_tensor* hada_2_mid = nullptr; // tau for tucker decomposition
|
||||
ggml_tensor* hada_2_up = nullptr;
|
||||
ggml_tensor* hada_2_down = nullptr;
|
||||
|
||||
std::string hada_1_mid_name = "";
|
||||
std::string hada_1_down_name = "";
|
||||
|
|
@ -368,7 +368,7 @@ struct LoraModel : public GGMLRunner {
|
|||
applied_lora_tensors.insert(hada_2_up_name);
|
||||
|
||||
applied_lora_tensors.insert(alpha_name);
|
||||
if (hada_1_up == NULL || hada_1_down == NULL || hada_2_up == NULL || hada_2_down == NULL) {
|
||||
if (hada_1_up == nullptr || hada_1_down == nullptr || hada_2_up == nullptr || hada_2_down == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -394,8 +394,8 @@ struct LoraModel : public GGMLRunner {
|
|||
|
||||
std::string alpha_name = full_key + ".alpha";
|
||||
|
||||
ggml_tensor* lokr_w1 = NULL;
|
||||
ggml_tensor* lokr_w2 = NULL;
|
||||
ggml_tensor* lokr_w1 = nullptr;
|
||||
ggml_tensor* lokr_w2 = nullptr;
|
||||
|
||||
std::string lokr_w1_name = "";
|
||||
std::string lokr_w2_name = "";
|
||||
|
|
@ -407,8 +407,8 @@ struct LoraModel : public GGMLRunner {
|
|||
lokr_w1 = to_f32(compute_ctx, lora_tensors[lokr_w1_name]);
|
||||
applied_lora_tensors.insert(lokr_w1_name);
|
||||
} else {
|
||||
ggml_tensor* down = NULL;
|
||||
ggml_tensor* up = NULL;
|
||||
ggml_tensor* down = nullptr;
|
||||
ggml_tensor* up = nullptr;
|
||||
std::string down_name = lokr_w1_name + "_b";
|
||||
std::string up_name = lokr_w1_name + "_a";
|
||||
if (lora_tensors.find(down_name) != lora_tensors.end()) {
|
||||
|
|
@ -432,8 +432,8 @@ struct LoraModel : public GGMLRunner {
|
|||
lokr_w2 = to_f32(compute_ctx, lora_tensors[lokr_w2_name]);
|
||||
applied_lora_tensors.insert(lokr_w2_name);
|
||||
} else {
|
||||
ggml_tensor* down = NULL;
|
||||
ggml_tensor* up = NULL;
|
||||
ggml_tensor* down = nullptr;
|
||||
ggml_tensor* up = nullptr;
|
||||
std::string down_name = lokr_w2_name + "_b";
|
||||
std::string up_name = lokr_w2_name + "_a";
|
||||
if (lora_tensors.find(down_name) != lora_tensors.end()) {
|
||||
|
|
@ -460,9 +460,9 @@ struct LoraModel : public GGMLRunner {
|
|||
|
||||
} else {
|
||||
// LoRA mode
|
||||
ggml_tensor* lora_mid = NULL; // tau for tucker decomposition
|
||||
ggml_tensor* lora_up = NULL;
|
||||
ggml_tensor* lora_down = NULL;
|
||||
ggml_tensor* lora_mid = nullptr; // tau for tucker decomposition
|
||||
ggml_tensor* lora_up = nullptr;
|
||||
ggml_tensor* lora_down = nullptr;
|
||||
|
||||
std::string alpha_name = "";
|
||||
std::string scale_name = "";
|
||||
|
|
@ -497,12 +497,12 @@ struct LoraModel : public GGMLRunner {
|
|||
auto split_k_alpha_name = full_key + "k" + suffix + ".alpha";
|
||||
auto split_v_alpha_name = full_key + "v" + suffix + ".alpha";
|
||||
|
||||
ggml_tensor* lora_q_down = NULL;
|
||||
ggml_tensor* lora_q_up = NULL;
|
||||
ggml_tensor* lora_k_down = NULL;
|
||||
ggml_tensor* lora_k_up = NULL;
|
||||
ggml_tensor* lora_v_down = NULL;
|
||||
ggml_tensor* lora_v_up = NULL;
|
||||
ggml_tensor* lora_q_down = nullptr;
|
||||
ggml_tensor* lora_q_up = nullptr;
|
||||
ggml_tensor* lora_k_down = nullptr;
|
||||
ggml_tensor* lora_k_up = nullptr;
|
||||
ggml_tensor* lora_v_down = nullptr;
|
||||
ggml_tensor* lora_v_up = nullptr;
|
||||
|
||||
lora_q_down = to_f32(compute_ctx, lora_tensors[split_q_d_name]);
|
||||
|
||||
|
|
@ -633,15 +633,15 @@ struct LoraModel : public GGMLRunner {
|
|||
auto split_v_alpha_name = full_key + "attn.to_v" + ".alpha";
|
||||
auto split_m_alpha_name = full_key + "proj_mlp" + ".alpha";
|
||||
|
||||
ggml_tensor* lora_q_down = NULL;
|
||||
ggml_tensor* lora_q_up = NULL;
|
||||
ggml_tensor* lora_k_down = NULL;
|
||||
ggml_tensor* lora_k_up = NULL;
|
||||
ggml_tensor* lora_v_down = NULL;
|
||||
ggml_tensor* lora_v_up = NULL;
|
||||
ggml_tensor* lora_q_down = nullptr;
|
||||
ggml_tensor* lora_q_up = nullptr;
|
||||
ggml_tensor* lora_k_down = nullptr;
|
||||
ggml_tensor* lora_k_up = nullptr;
|
||||
ggml_tensor* lora_v_down = nullptr;
|
||||
ggml_tensor* lora_v_up = nullptr;
|
||||
|
||||
ggml_tensor* lora_m_down = NULL;
|
||||
ggml_tensor* lora_m_up = NULL;
|
||||
ggml_tensor* lora_m_down = nullptr;
|
||||
ggml_tensor* lora_m_up = nullptr;
|
||||
|
||||
lora_q_up = to_f32(compute_ctx, lora_tensors[split_q_u_name]);
|
||||
|
||||
|
|
@ -809,7 +809,7 @@ struct LoraModel : public GGMLRunner {
|
|||
}
|
||||
}
|
||||
|
||||
if (lora_up == NULL || lora_down == NULL) {
|
||||
if (lora_up == nullptr || lora_down == nullptr) {
|
||||
continue;
|
||||
}
|
||||
// calc_scale
|
||||
|
|
|
|||
|
|
@ -13,10 +13,10 @@ namespace LTXV {
|
|||
public:
|
||||
CausalConv3d(int64_t in_channels,
|
||||
int64_t out_channels,
|
||||
int kernel_size = 3,
|
||||
std::tuple<int> stride = {1, 1, 1},
|
||||
int dilation = 1,
|
||||
bool bias = true) {
|
||||
int kernel_size = 3,
|
||||
std::tuple<int, int, int> stride = {1, 1, 1},
|
||||
int dilation = 1,
|
||||
bool bias = true) {
|
||||
time_kernel_size = kernel_size / 2;
|
||||
blocks["conv"] = std::shared_ptr<GGMLBlock>(new Conv3d(in_channels,
|
||||
out_channels,
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include <map>
|
||||
#include <random>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
|
@ -213,119 +214,6 @@ void print_params(SDParams params) {
|
|||
free(high_noise_sample_params_str);
|
||||
}
|
||||
|
||||
void print_usage(int argc, const char* argv[]) {
|
||||
printf("usage: %s [arguments]\n", argv[0]);
|
||||
printf("\n");
|
||||
printf("arguments:\n");
|
||||
printf(" -h, --help show this help message and exit\n");
|
||||
printf(" -M, --mode [MODE] run mode, one of: [img_gen, vid_gen, upscale, convert], default: img_gen\n");
|
||||
printf(" -t, --threads N number of threads to use during computation (default: -1)\n");
|
||||
printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n");
|
||||
printf(" --offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed\n");
|
||||
printf(" -m, --model [MODEL] path to full model\n");
|
||||
printf(" --diffusion-model path to the standalone diffusion model\n");
|
||||
printf(" --high-noise-diffusion-model path to the standalone high noise diffusion model\n");
|
||||
printf(" --clip_l path to the clip-l text encoder\n");
|
||||
printf(" --clip_g path to the clip-g text encoder\n");
|
||||
printf(" --clip_vision path to the clip-vision encoder\n");
|
||||
printf(" --t5xxl path to the t5xxl text encoder\n");
|
||||
printf(" --qwen2vl path to the qwen2vl text encoder\n");
|
||||
printf(" --qwen2vl_vision path to the qwen2vl vit\n");
|
||||
printf(" --vae [VAE] path to vae\n");
|
||||
printf(" --taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)\n");
|
||||
printf(" --control-net [CONTROL_PATH] path to control net model\n");
|
||||
printf(" --embd-dir [EMBEDDING_PATH] path to embeddings\n");
|
||||
printf(" --upscale-model [ESRGAN_PATH] path to esrgan model. For img_gen mode, upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now\n");
|
||||
printf(" --upscale-repeats Run the ESRGAN upscaler this many times (default 1)\n");
|
||||
printf(" --type [TYPE] weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K)\n");
|
||||
printf(" If not specified, the default is the type of the weight file\n");
|
||||
printf(" --tensor-type-rules [EXPRESSION] weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")\n");
|
||||
printf(" --lora-model-dir [DIR] lora model directory\n");
|
||||
printf(" -i, --init-img [IMAGE] path to the init image, required by img2img\n");
|
||||
printf(" --mask [MASK] path to the mask image, required by img2img with mask\n");
|
||||
printf(" -i, --end-img [IMAGE] path to the end image, required by flf2v\n");
|
||||
printf(" --control-image [IMAGE] path to image condition, control net\n");
|
||||
printf(" -r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times) \n");
|
||||
printf(" --disable-auto-resize-ref-image disable auto resize of ref images\n");
|
||||
printf(" --control-video [PATH] path to control video frames, It must be a directory path.\n");
|
||||
printf(" The video frames inside should be stored as images in lexicographical (character) order\n");
|
||||
printf(" For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, ... etc.\n");
|
||||
printf(" --increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).\n");
|
||||
printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n");
|
||||
printf(" -p, --prompt [PROMPT] the prompt to render\n");
|
||||
printf(" -n, --negative-prompt PROMPT the negative prompt (default: \"\")\n");
|
||||
printf(" --cfg-scale SCALE unconditional guidance scale: (default: 7.0)\n");
|
||||
printf(" --img-cfg-scale SCALE image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)\n");
|
||||
printf(" --guidance SCALE distilled guidance scale for models with guidance input (default: 3.5)\n");
|
||||
printf(" --slg-scale SCALE skip layer guidance (SLG) scale, only for DiT models: (default: 0)\n");
|
||||
printf(" 0 means disabled, a value of 2.5 is nice for sd3.5 medium\n");
|
||||
printf(" --eta SCALE eta in DDIM, only for DDIM and TCD: (default: 0)\n");
|
||||
printf(" --skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])\n");
|
||||
printf(" --skip-layer-start START SLG enabling point: (default: 0.01)\n");
|
||||
printf(" --skip-layer-end END SLG disabling point: (default: 0.2)\n");
|
||||
printf(" --scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)\n");
|
||||
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
|
||||
printf(" sampling method (default: \"euler\" for Flux/SD3/Wan, \"euler_a\" otherwise)\n");
|
||||
printf(" --timestep-shift N shift timestep for NitroFusion models, default: 0, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant\n");
|
||||
printf(" --steps STEPS number of sample steps (default: 20)\n");
|
||||
printf(" --high-noise-cfg-scale SCALE (high noise) unconditional guidance scale: (default: 7.0)\n");
|
||||
printf(" --high-noise-img-cfg-scale SCALE (high noise) image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)\n");
|
||||
printf(" --high-noise-guidance SCALE (high noise) distilled guidance scale for models with guidance input (default: 3.5)\n");
|
||||
printf(" --high-noise-slg-scale SCALE (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)\n");
|
||||
printf(" 0 means disabled, a value of 2.5 is nice for sd3.5 medium\n");
|
||||
printf(" --high-noise-eta SCALE (high noise) eta in DDIM, only for DDIM and TCD: (default: 0)\n");
|
||||
printf(" --high-noise-skip-layers LAYERS (high noise) Layers to skip for SLG steps: (default: [7,8,9])\n");
|
||||
printf(" --high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)\n");
|
||||
printf(" --high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)\n");
|
||||
printf(" --high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)\n");
|
||||
printf(" --high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
|
||||
printf(" (high noise) sampling method (default: \"euler_a\")\n");
|
||||
printf(" --high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)\n");
|
||||
printf(" SLG will be enabled at step int([STEPS]*[START]) and disabled at int([STEPS]*[END])\n");
|
||||
printf(" --strength STRENGTH strength for noising/unnoising (default: 0.75)\n");
|
||||
printf(" --control-strength STRENGTH strength to apply Control Net (default: 0.9)\n");
|
||||
printf(" 1.0 corresponds to full destruction of information in init image\n");
|
||||
printf(" -H, --height H image height, in pixel space (default: 512)\n");
|
||||
printf(" -W, --width W image width, in pixel space (default: 512)\n");
|
||||
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
|
||||
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
|
||||
printf(" -b, --batch-count COUNT number of images to generate\n");
|
||||
printf(" --prediction {eps, v, edm_v, sd3_flow, flux_flow} Prediction type override.\n");
|
||||
printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
|
||||
printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
|
||||
printf(" --vae-tiling process vae in tiles to reduce memory usage\n");
|
||||
printf(" --vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)\n");
|
||||
printf(" --vae-relative-tile-size [X]x[Y] relative tile size for vae tiling, in fraction of image size if < 1, in number of tiles per dim if >=1 (overrides --vae-tile-size)\n");
|
||||
printf(" --vae-tile-overlap OVERLAP tile overlap for vae tiling, in fraction of tile size (default: 0.5)\n");
|
||||
printf(" --force-sdxl-vae-conv-scale force use of conv scale on sdxl vae\n");
|
||||
printf(" --vae-on-cpu keep vae in cpu (for low vram)\n");
|
||||
printf(" --clip-on-cpu keep clip in cpu (for low vram)\n");
|
||||
printf(" --diffusion-fa use flash attention in the diffusion model (for low vram)\n");
|
||||
printf(" Might lower quality, since it implies converting k and v to f16.\n");
|
||||
printf(" This might crash if it is not supported by the backend.\n");
|
||||
printf(" --diffusion-conv-direct use Conv2d direct in the diffusion model\n");
|
||||
printf(" This might crash if it is not supported by the backend.\n");
|
||||
printf(" --vae-conv-direct use Conv2d direct in the vae model (should improve the performance)\n");
|
||||
printf(" This might crash if it is not supported by the backend.\n");
|
||||
printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n");
|
||||
printf(" --canny apply canny preprocessor (edge detection)\n");
|
||||
printf(" --color colors the logging tags according to level\n");
|
||||
printf(" --chroma-disable-dit-mask disable dit mask for chroma\n");
|
||||
printf(" --chroma-enable-t5-mask enable t5 mask for chroma\n");
|
||||
printf(" --chroma-t5-mask-pad PAD_SIZE t5 mask pad size of chroma\n");
|
||||
printf(" --video-frames video frames (default: 1)\n");
|
||||
printf(" --fps fps (default: 24)\n");
|
||||
printf(" --moe-boundary BOUNDARY timestep boundary for Wan2.2 MoE model. (default: 0.875)\n");
|
||||
printf(" only enabled if `--high-noise-steps` is set to -1\n");
|
||||
printf(" --flow-shift SHIFT shift value for Flow models like SD3.x or WAN (default: auto)\n");
|
||||
printf(" --vace-strength wan vace strength\n");
|
||||
printf(" --photo-maker path to PHOTOMAKER model\n");
|
||||
printf(" --pm-id-images-dir [DIR] path to PHOTOMAKER input id images dir\n");
|
||||
printf(" --pm-id-embed-path [PATH] path to PHOTOMAKER v2 id embed\n");
|
||||
printf(" --pm-style-strength strength for keeping PHOTOMAKER input identity (default: 20)\n");
|
||||
printf(" -v, --verbose print extra info\n");
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
static std::string utf16_to_utf8(const std::wstring& wstr) {
|
||||
if (wstr.empty())
|
||||
|
|
@ -495,94 +383,424 @@ bool parse_options(int argc, const char** argv, ArgOptions& options) {
|
|||
return true;
|
||||
}
|
||||
|
||||
static std::string wrap_text(const std::string& text, size_t width, size_t indent) {
|
||||
std::ostringstream oss;
|
||||
size_t line_len = 0;
|
||||
size_t pos = 0;
|
||||
|
||||
while (pos < text.size()) {
|
||||
// Preserve manual newlines
|
||||
if (text[pos] == '\n') {
|
||||
oss << '\n'
|
||||
<< std::string(indent, ' ');
|
||||
line_len = indent;
|
||||
++pos;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Add the character
|
||||
oss << text[pos];
|
||||
++line_len;
|
||||
++pos;
|
||||
|
||||
// If the current line exceeds width, try to break at the last space
|
||||
if (line_len >= width) {
|
||||
std::string current = oss.str();
|
||||
size_t back = current.size();
|
||||
|
||||
// Find the last space (for a clean break)
|
||||
while (back > 0 && current[back - 1] != ' ' && current[back - 1] != '\n')
|
||||
--back;
|
||||
|
||||
// If found a space to break on
|
||||
if (back > 0 && current[back - 1] != '\n') {
|
||||
std::string before = current.substr(0, back - 1);
|
||||
std::string after = current.substr(back);
|
||||
oss.str("");
|
||||
oss.clear();
|
||||
oss << before << "\n"
|
||||
<< std::string(indent, ' ') << after;
|
||||
} else {
|
||||
// If no space found, just break at width
|
||||
oss << "\n"
|
||||
<< std::string(indent, ' ');
|
||||
}
|
||||
line_len = indent;
|
||||
}
|
||||
}
|
||||
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
void print_usage(int argc, const char* argv[], const ArgOptions& options) {
|
||||
constexpr size_t max_line_width = 120;
|
||||
|
||||
std::cout << "Usage: " << argv[0] << " [options]\n\n";
|
||||
std::cout << "Options:\n";
|
||||
|
||||
struct Entry {
|
||||
std::string names;
|
||||
std::string desc;
|
||||
};
|
||||
std::vector<Entry> entries;
|
||||
|
||||
auto add_entry = [&](const std::string& s, const std::string& l,
|
||||
const std::string& desc, const std::string& hint = "") {
|
||||
std::ostringstream ss;
|
||||
if (!s.empty())
|
||||
ss << s;
|
||||
if (!s.empty() && !l.empty())
|
||||
ss << ", ";
|
||||
if (!l.empty())
|
||||
ss << l;
|
||||
if (!hint.empty())
|
||||
ss << " " << hint;
|
||||
entries.push_back({ss.str(), desc});
|
||||
};
|
||||
|
||||
for (auto& o : options.string_options)
|
||||
add_entry(o.short_name, o.long_name, o.desc, "<string>");
|
||||
for (auto& o : options.int_options)
|
||||
add_entry(o.short_name, o.long_name, o.desc, "<int>");
|
||||
for (auto& o : options.float_options)
|
||||
add_entry(o.short_name, o.long_name, o.desc, "<float>");
|
||||
for (auto& o : options.bool_options)
|
||||
add_entry(o.short_name, o.long_name, o.desc, "");
|
||||
for (auto& o : options.manual_options)
|
||||
add_entry(o.short_name, o.long_name, o.desc);
|
||||
|
||||
size_t max_name_width = 0;
|
||||
for (auto& e : entries)
|
||||
max_name_width = std::max(max_name_width, e.names.size());
|
||||
|
||||
for (auto& e : entries) {
|
||||
size_t indent = 2 + max_name_width + 4;
|
||||
size_t desc_width = (max_line_width > indent ? max_line_width - indent : 40);
|
||||
std::string wrapped_desc = wrap_text(e.desc, max_line_width, indent);
|
||||
std::cout << " " << std::left << std::setw(static_cast<int>(max_name_width) + 4)
|
||||
<< e.names << wrapped_desc << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
void parse_args(int argc, const char** argv, SDParams& params) {
|
||||
ArgOptions options;
|
||||
options.string_options = {
|
||||
{"-m", "--model", "", ¶ms.model_path},
|
||||
{"", "--clip_l", "", ¶ms.clip_l_path},
|
||||
{"", "--clip_g", "", ¶ms.clip_g_path},
|
||||
{"", "--clip_vision", "", ¶ms.clip_vision_path},
|
||||
{"", "--t5xxl", "", ¶ms.t5xxl_path},
|
||||
{"", "--qwen2vl", "", ¶ms.qwen2vl_path},
|
||||
{"", "--qwen2vl_vision", "", ¶ms.qwen2vl_vision_path},
|
||||
{"", "--diffusion-model", "", ¶ms.diffusion_model_path},
|
||||
{"", "--high-noise-diffusion-model", "", ¶ms.high_noise_diffusion_model_path},
|
||||
{"", "--vae", "", ¶ms.vae_path},
|
||||
{"", "--taesd", "", ¶ms.taesd_path},
|
||||
{"", "--control-net", "", ¶ms.control_net_path},
|
||||
{"", "--embd-dir", "", ¶ms.embedding_dir},
|
||||
{"", "--lora-model-dir", "", ¶ms.lora_model_dir},
|
||||
{"-i", "--init-img", "", ¶ms.init_image_path},
|
||||
{"", "--end-img", "", ¶ms.end_image_path},
|
||||
{"", "--tensor-type-rules", "", ¶ms.tensor_type_rules},
|
||||
{"", "--photo-maker", "", ¶ms.photo_maker_path},
|
||||
{"", "--pm-id-images-dir", "", ¶ms.pm_id_images_dir},
|
||||
{"", "--pm-id-embed-path", "", ¶ms.pm_id_embed_path},
|
||||
{"", "--mask", "", ¶ms.mask_image_path},
|
||||
{"", "--control-image", "", ¶ms.control_image_path},
|
||||
{"", "--control-video", "", ¶ms.control_video_path},
|
||||
{"-o", "--output", "", ¶ms.output_path},
|
||||
{"-p", "--prompt", "", ¶ms.prompt},
|
||||
{"-n", "--negative-prompt", "", ¶ms.negative_prompt},
|
||||
{"", "--upscale-model", "", ¶ms.esrgan_path},
|
||||
{"-m",
|
||||
"--model",
|
||||
"path to full model",
|
||||
¶ms.model_path},
|
||||
{"",
|
||||
"--clip_l",
|
||||
"path to the clip-l text encoder", ¶ms.clip_l_path},
|
||||
{"", "--clip_g",
|
||||
"path to the clip-g text encoder",
|
||||
¶ms.clip_g_path},
|
||||
{"",
|
||||
"--clip_vision",
|
||||
"path to the clip-vision encoder",
|
||||
¶ms.clip_vision_path},
|
||||
{"",
|
||||
"--t5xxl",
|
||||
"path to the t5xxl text encoder",
|
||||
¶ms.t5xxl_path},
|
||||
{"",
|
||||
"--qwen2vl",
|
||||
"path to the qwen2vl text encoder",
|
||||
¶ms.qwen2vl_path},
|
||||
{"",
|
||||
"--qwen2vl_vision",
|
||||
"path to the qwen2vl vit",
|
||||
¶ms.qwen2vl_vision_path},
|
||||
{"",
|
||||
"--diffusion-model",
|
||||
"path to the standalone diffusion model",
|
||||
¶ms.diffusion_model_path},
|
||||
{"",
|
||||
"--high-noise-diffusion-model",
|
||||
"path to the standalone high noise diffusion model",
|
||||
¶ms.high_noise_diffusion_model_path},
|
||||
{"",
|
||||
"--vae",
|
||||
"path to standalone vae model",
|
||||
¶ms.vae_path},
|
||||
{"",
|
||||
"--taesd",
|
||||
"path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)",
|
||||
¶ms.taesd_path},
|
||||
{"",
|
||||
"--control-net",
|
||||
"path to control net model",
|
||||
¶ms.control_net_path},
|
||||
{"",
|
||||
"--embd-dir",
|
||||
"embeddings directory",
|
||||
¶ms.embedding_dir},
|
||||
{"",
|
||||
"--lora-model-dir",
|
||||
"lora model directory",
|
||||
¶ms.lora_model_dir},
|
||||
{"-i",
|
||||
"--init-img",
|
||||
"path to the init image",
|
||||
¶ms.init_image_path},
|
||||
{"",
|
||||
"--end-img",
|
||||
"path to the end image, required by flf2v",
|
||||
¶ms.end_image_path},
|
||||
{"",
|
||||
"--tensor-type-rules",
|
||||
"weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")",
|
||||
¶ms.tensor_type_rules},
|
||||
{"",
|
||||
"--photo-maker",
|
||||
"path to PHOTOMAKER model",
|
||||
¶ms.photo_maker_path},
|
||||
{"",
|
||||
"--pm-id-images-dir",
|
||||
"path to PHOTOMAKER input id images dir",
|
||||
¶ms.pm_id_images_dir},
|
||||
{"",
|
||||
"--pm-id-embed-path",
|
||||
"path to PHOTOMAKER v2 id embed",
|
||||
¶ms.pm_id_embed_path},
|
||||
{"",
|
||||
"--mask",
|
||||
"path to the mask image",
|
||||
¶ms.mask_image_path},
|
||||
{"",
|
||||
"--control-image",
|
||||
"path to control image, control net",
|
||||
¶ms.control_image_path},
|
||||
{"",
|
||||
"--control-video",
|
||||
"path to control video frames, It must be a directory path. The video frames inside should be stored as images in "
|
||||
"lexicographical (character) order. For example, if the control video path is `frames`, the directory contain images "
|
||||
"such as 00.png, 01.png, ... etc.",
|
||||
¶ms.control_video_path},
|
||||
{"-o",
|
||||
"--output",
|
||||
"path to write result image to (default: ./output.png)",
|
||||
¶ms.output_path},
|
||||
{"-p",
|
||||
"--prompt",
|
||||
"the prompt to render",
|
||||
¶ms.prompt},
|
||||
{"-n",
|
||||
"--negative-prompt",
|
||||
"the negative prompt (default: \"\")",
|
||||
¶ms.negative_prompt},
|
||||
{"",
|
||||
"--upscale-model",
|
||||
"path to esrgan model.",
|
||||
¶ms.esrgan_path},
|
||||
};
|
||||
|
||||
options.int_options = {
|
||||
{"-t", "--threads", "", ¶ms.n_threads},
|
||||
{"", "--upscale-repeats", "", ¶ms.upscale_repeats},
|
||||
{"-H", "--height", "", ¶ms.height},
|
||||
{"-W", "--width", "", ¶ms.width},
|
||||
{"", "--steps", "", ¶ms.sample_params.sample_steps},
|
||||
{"", "--high-noise-steps", "", ¶ms.high_noise_sample_params.sample_steps},
|
||||
{"", "--clip-skip", "", ¶ms.clip_skip},
|
||||
{"-b", "--batch-count", "", ¶ms.batch_count},
|
||||
{"", "--chroma-t5-mask-pad", "", ¶ms.chroma_t5_mask_pad},
|
||||
{"", "--video-frames", "", ¶ms.video_frames},
|
||||
{"", "--fps", "", ¶ms.fps},
|
||||
{"", "--timestep-shift", "", ¶ms.sample_params.shifted_timestep},
|
||||
{"-t",
|
||||
"--threads",
|
||||
"number of threads to use during computation (default: -1). "
|
||||
"If threads <= 0, then threads will be set to the number of CPU physical cores",
|
||||
¶ms.n_threads},
|
||||
{"",
|
||||
"--upscale-repeats",
|
||||
"Run the ESRGAN upscaler this many times (default: 1)",
|
||||
¶ms.upscale_repeats},
|
||||
{"-H",
|
||||
"--height",
|
||||
"image height, in pixel space (default: 512)",
|
||||
¶ms.height},
|
||||
{"-W",
|
||||
"--width",
|
||||
"image width, in pixel space (default: 512)",
|
||||
¶ms.width},
|
||||
{"",
|
||||
"--steps",
|
||||
"number of sample steps (default: 20)",
|
||||
¶ms.sample_params.sample_steps},
|
||||
{"",
|
||||
"--high-noise-steps",
|
||||
"(high noise) number of sample steps (default: -1 = auto)",
|
||||
¶ms.high_noise_sample_params.sample_steps},
|
||||
{"",
|
||||
"--clip-skip",
|
||||
"ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1). "
|
||||
"<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x",
|
||||
¶ms.clip_skip},
|
||||
{"-b",
|
||||
"--batch-count",
|
||||
"batch count",
|
||||
¶ms.batch_count},
|
||||
{"",
|
||||
"--chroma-t5-mask-pad",
|
||||
"t5 mask pad size of chroma",
|
||||
¶ms.chroma_t5_mask_pad},
|
||||
{"",
|
||||
"--video-frames",
|
||||
"video frames (default: 1)",
|
||||
¶ms.video_frames},
|
||||
{"",
|
||||
"--fps",
|
||||
"fps (default: 24)",
|
||||
¶ms.fps},
|
||||
{"",
|
||||
"--timestep-shift",
|
||||
"shift timestep for NitroFusion models (default: 0). "
|
||||
"recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant",
|
||||
¶ms.sample_params.shifted_timestep},
|
||||
};
|
||||
|
||||
options.float_options = {
|
||||
{"", "--cfg-scale", "", ¶ms.sample_params.guidance.txt_cfg},
|
||||
{"", "--img-cfg-scale", "", ¶ms.sample_params.guidance.img_cfg},
|
||||
{"", "--guidance", "", ¶ms.sample_params.guidance.distilled_guidance},
|
||||
{"", "--slg-scale", "", ¶ms.sample_params.guidance.slg.scale},
|
||||
{"", "--skip-layer-start", "", ¶ms.sample_params.guidance.slg.layer_start},
|
||||
{"", "--skip-layer-end", "", ¶ms.sample_params.guidance.slg.layer_end},
|
||||
{"", "--eta", "", ¶ms.sample_params.eta},
|
||||
{"", "--high-noise-cfg-scale", "", ¶ms.high_noise_sample_params.guidance.txt_cfg},
|
||||
{"", "--high-noise-img-cfg-scale", "", ¶ms.high_noise_sample_params.guidance.img_cfg},
|
||||
{"", "--high-noise-guidance", "", ¶ms.high_noise_sample_params.guidance.distilled_guidance},
|
||||
{"", "--high-noise-slg-scale", "", ¶ms.high_noise_sample_params.guidance.slg.scale},
|
||||
{"", "--high-noise-skip-layer-start", "", ¶ms.high_noise_sample_params.guidance.slg.layer_start},
|
||||
{"", "--high-noise-skip-layer-end", "", ¶ms.high_noise_sample_params.guidance.slg.layer_end},
|
||||
{"", "--high-noise-eta", "", ¶ms.high_noise_sample_params.eta},
|
||||
{"", "--strength", "", ¶ms.strength},
|
||||
{"", "--pm-style-strength", "", ¶ms.pm_style_strength},
|
||||
{"", "--control-strength", "", ¶ms.control_strength},
|
||||
{"", "--moe-boundary", "", ¶ms.moe_boundary},
|
||||
{"", "--flow-shift", "", ¶ms.flow_shift},
|
||||
{"", "--vace-strength", "", ¶ms.vace_strength},
|
||||
{"", "--vae-tile-overlap", "", ¶ms.vae_tiling_params.target_overlap},
|
||||
{"",
|
||||
"--cfg-scale",
|
||||
"unconditional guidance scale: (default: 7.0)",
|
||||
¶ms.sample_params.guidance.txt_cfg},
|
||||
{"",
|
||||
"--img-cfg-scale",
|
||||
"image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)",
|
||||
¶ms.sample_params.guidance.img_cfg},
|
||||
{"",
|
||||
"--guidance",
|
||||
"distilled guidance scale for models with guidance input (default: 3.5)",
|
||||
¶ms.sample_params.guidance.distilled_guidance},
|
||||
{"",
|
||||
"--slg-scale",
|
||||
"skip layer guidance (SLG) scale, only for DiT models: (default: 0). 0 means disabled, a value of 2.5 is nice for sd3.5 medium",
|
||||
¶ms.sample_params.guidance.slg.scale},
|
||||
{"",
|
||||
"--skip-layer-start",
|
||||
"SLG enabling point (default: 0.01)",
|
||||
¶ms.sample_params.guidance.slg.layer_start},
|
||||
{"",
|
||||
"--skip-layer-end",
|
||||
"SLG disabling point (default: 0.2)",
|
||||
¶ms.sample_params.guidance.slg.layer_end},
|
||||
{"",
|
||||
"--eta",
|
||||
"eta in DDIM, only for DDIM and TCD (default: 0)",
|
||||
¶ms.sample_params.eta},
|
||||
{"",
|
||||
"--high-noise-cfg-scale",
|
||||
"(high noise) unconditional guidance scale: (default: 7.0)",
|
||||
¶ms.high_noise_sample_params.guidance.txt_cfg},
|
||||
{"",
|
||||
"--high-noise-img-cfg-scale",
|
||||
"(high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)",
|
||||
¶ms.high_noise_sample_params.guidance.img_cfg},
|
||||
{"",
|
||||
"--high-noise-guidance",
|
||||
"(high noise) distilled guidance scale for models with guidance input (default: 3.5)",
|
||||
¶ms.high_noise_sample_params.guidance.distilled_guidance},
|
||||
{"",
|
||||
"--high-noise-slg-scale",
|
||||
"(high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)",
|
||||
¶ms.high_noise_sample_params.guidance.slg.scale},
|
||||
{"",
|
||||
"--high-noise-skip-layer-start",
|
||||
"(high noise) SLG enabling point (default: 0.01)",
|
||||
¶ms.high_noise_sample_params.guidance.slg.layer_start},
|
||||
{"",
|
||||
"--high-noise-skip-layer-end",
|
||||
"(high noise) SLG disabling point (default: 0.2)",
|
||||
¶ms.high_noise_sample_params.guidance.slg.layer_end},
|
||||
{"",
|
||||
"--high-noise-eta",
|
||||
"(high noise) eta in DDIM, only for DDIM and TCD (default: 0)",
|
||||
¶ms.high_noise_sample_params.eta},
|
||||
{"",
|
||||
"--strength",
|
||||
"strength for noising/unnoising (default: 0.75)",
|
||||
¶ms.strength},
|
||||
{"",
|
||||
"--pm-style-strength",
|
||||
"",
|
||||
¶ms.pm_style_strength},
|
||||
{"",
|
||||
"--control-strength",
|
||||
"strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image",
|
||||
¶ms.control_strength},
|
||||
{"",
|
||||
"--moe-boundary",
|
||||
"timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if `--high-noise-steps` is set to -1",
|
||||
¶ms.moe_boundary},
|
||||
{"",
|
||||
"--flow-shift",
|
||||
"shift value for Flow models like SD3.x or WAN (default: auto)",
|
||||
¶ms.flow_shift},
|
||||
{"",
|
||||
"--vace-strength",
|
||||
"wan vace strength",
|
||||
¶ms.vace_strength},
|
||||
{"",
|
||||
"--vae-tile-overlap",
|
||||
"tile overlap for vae tiling, in fraction of tile size (default: 0.5)",
|
||||
¶ms.vae_tiling_params.target_overlap},
|
||||
};
|
||||
|
||||
options.bool_options = {
|
||||
{"", "--vae-tiling", "", true, ¶ms.vae_tiling_params.enabled},
|
||||
{"", "--force-sdxl-vae-conv-scale", "", true, ¶ms.force_sdxl_vae_conv_scale},
|
||||
{"", "--offload-to-cpu", "", true, ¶ms.offload_params_to_cpu},
|
||||
{"", "--control-net-cpu", "", true, ¶ms.control_net_cpu},
|
||||
{"", "--clip-on-cpu", "", true, ¶ms.clip_on_cpu},
|
||||
{"", "--vae-on-cpu", "", true, ¶ms.vae_on_cpu},
|
||||
{"", "--diffusion-fa", "", true, ¶ms.diffusion_flash_attn},
|
||||
{"", "--diffusion-conv-direct", "", true, ¶ms.diffusion_conv_direct},
|
||||
{"", "--vae-conv-direct", "", true, ¶ms.vae_conv_direct},
|
||||
{"", "--canny", "", true, ¶ms.canny_preprocess},
|
||||
{"-v", "--verbose", "", true, ¶ms.verbose},
|
||||
{"", "--color", "", true, ¶ms.color},
|
||||
{"", "--chroma-disable-dit-mask", "", false, ¶ms.chroma_use_dit_mask},
|
||||
{"", "--chroma-enable-t5-mask", "", true, ¶ms.chroma_use_t5_mask},
|
||||
{"", "--increase-ref-index", "", true, ¶ms.increase_ref_index},
|
||||
{"", "--disable-auto-resize-ref-image", "", false, ¶ms.auto_resize_ref_image},
|
||||
{"",
|
||||
"--vae-tiling",
|
||||
"process vae in tiles to reduce memory usage",
|
||||
true, ¶ms.vae_tiling_params.enabled},
|
||||
{"",
|
||||
"--force-sdxl-vae-conv-scale",
|
||||
"force use of conv scale on sdxl vae",
|
||||
true, ¶ms.force_sdxl_vae_conv_scale},
|
||||
{"",
|
||||
"--offload-to-cpu",
|
||||
"place the weights in RAM to save VRAM, and automatically load them into VRAM when needed",
|
||||
true, ¶ms.offload_params_to_cpu},
|
||||
{"",
|
||||
"--control-net-cpu",
|
||||
"keep controlnet in cpu (for low vram)",
|
||||
true, ¶ms.control_net_cpu},
|
||||
{"",
|
||||
"--clip-on-cpu",
|
||||
"keep clip in cpu (for low vram)",
|
||||
true, ¶ms.clip_on_cpu},
|
||||
{"",
|
||||
"--vae-on-cpu",
|
||||
"keep vae in cpu (for low vram)",
|
||||
true, ¶ms.vae_on_cpu},
|
||||
{"",
|
||||
"--diffusion-fa",
|
||||
"use flash attention in the diffusion model",
|
||||
true, ¶ms.diffusion_flash_attn},
|
||||
{"",
|
||||
"--diffusion-conv-direct",
|
||||
"use ggml_conv2d_direct in the diffusion model",
|
||||
true, ¶ms.diffusion_conv_direct},
|
||||
{"",
|
||||
"--vae-conv-direct",
|
||||
"use ggml_conv2d_direct in the vae model",
|
||||
true, ¶ms.vae_conv_direct},
|
||||
{"",
|
||||
"--canny",
|
||||
"apply canny preprocessor (edge detection)",
|
||||
true, ¶ms.canny_preprocess},
|
||||
{"-v",
|
||||
"--verbose",
|
||||
"print extra info",
|
||||
true, ¶ms.verbose},
|
||||
{"",
|
||||
"--color",
|
||||
"colors the logging tags according to level",
|
||||
true, ¶ms.color},
|
||||
{"",
|
||||
"--chroma-disable-dit-mask",
|
||||
"disable dit mask for chroma",
|
||||
false, ¶ms.chroma_use_dit_mask},
|
||||
{"",
|
||||
"--chroma-enable-t5-mask",
|
||||
"enable t5 mask for chroma",
|
||||
true, ¶ms.chroma_use_t5_mask},
|
||||
{"",
|
||||
"--increase-ref-index",
|
||||
"automatically increase the indices of references images based on the order they are listed (starting with 1).",
|
||||
true, ¶ms.increase_ref_index},
|
||||
{"",
|
||||
"--disable-auto-resize-ref-image",
|
||||
"disable auto resize of ref images",
|
||||
false, ¶ms.auto_resize_ref_image},
|
||||
};
|
||||
|
||||
auto on_mode_arg = [&](int argc, const char** argv, int index) {
|
||||
|
|
@ -590,7 +808,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||
return -1;
|
||||
}
|
||||
const char* mode = argv[index];
|
||||
if (mode != NULL) {
|
||||
if (mode != nullptr) {
|
||||
int mode_found = -1;
|
||||
for (int i = 0; i < MODE_COUNT; i++) {
|
||||
if (!strcmp(mode, modes_str[i])) {
|
||||
|
|
@ -715,7 +933,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||
};
|
||||
|
||||
auto on_help_arg = [&](int argc, const char** argv, int index) {
|
||||
print_usage(argc, argv);
|
||||
print_usage(argc, argv, options);
|
||||
exit(0);
|
||||
return 0;
|
||||
};
|
||||
|
|
@ -829,25 +1047,73 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||
};
|
||||
|
||||
options.manual_options = {
|
||||
{"-M", "--mode", "", on_mode_arg},
|
||||
{"", "--type", "", on_type_arg},
|
||||
{"", "--rng", "", on_rng_arg},
|
||||
{"-s", "--seed", "", on_seed_arg},
|
||||
{"", "--sampling-method", "", on_sample_method_arg},
|
||||
{"", "--prediction", "", on_prediction_arg},
|
||||
{"", "--scheduler", "", on_schedule_arg},
|
||||
{"", "--skip-layers", "", on_skip_layers_arg},
|
||||
{"", "--high-noise-sampling-method", "", on_high_noise_sample_method_arg},
|
||||
{"", "--high-noise-scheduler", "", on_high_noise_schedule_arg},
|
||||
{"", "--high-noise-skip-layers", "", on_high_noise_skip_layers_arg},
|
||||
{"-r", "--ref-image", "", on_ref_image_arg},
|
||||
{"-h", "--help", "", on_help_arg},
|
||||
{"", "--vae-tile-size", "", on_tile_size_arg},
|
||||
{"", "--vae-relative-tile-size", "", on_relative_tile_size_arg},
|
||||
{"-M",
|
||||
"--mode",
|
||||
"run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen",
|
||||
on_mode_arg},
|
||||
{"",
|
||||
"--type",
|
||||
"weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). "
|
||||
"If not specified, the default is the type of the weight file",
|
||||
on_type_arg},
|
||||
{"",
|
||||
"--rng",
|
||||
"RNG, one of [std_default, cuda], default: cuda",
|
||||
on_rng_arg},
|
||||
{"-s",
|
||||
"--seed",
|
||||
"RNG seed (default: 42, use random seed for < 0)",
|
||||
on_seed_arg},
|
||||
{"",
|
||||
"--sampling-method",
|
||||
"sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd] "
|
||||
"(default: euler for Flux/SD3/Wan, euler_a otherwise)",
|
||||
on_sample_method_arg},
|
||||
{"",
|
||||
"--prediction",
|
||||
"prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow]",
|
||||
on_prediction_arg},
|
||||
{"",
|
||||
"--scheduler",
|
||||
"denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple], default: discrete",
|
||||
on_schedule_arg},
|
||||
{"",
|
||||
"--skip-layers",
|
||||
"layers to skip for SLG steps (default: [7,8,9])",
|
||||
on_skip_layers_arg},
|
||||
{"",
|
||||
"--high-noise-sampling-method",
|
||||
"(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd]"
|
||||
" default: euler for Flux/SD3/Wan, euler_a otherwise",
|
||||
on_high_noise_sample_method_arg},
|
||||
{"",
|
||||
"--high-noise-scheduler",
|
||||
"(high noise) denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple], default: discrete",
|
||||
on_high_noise_schedule_arg},
|
||||
{"",
|
||||
"--high-noise-skip-layers",
|
||||
"(high noise) layers to skip for SLG steps (default: [7,8,9])",
|
||||
on_high_noise_skip_layers_arg},
|
||||
{"-r",
|
||||
"--ref-image",
|
||||
"reference image for Flux Kontext models (can be used multiple times)",
|
||||
on_ref_image_arg},
|
||||
{"-h",
|
||||
"--help",
|
||||
"show this help message and exit",
|
||||
on_help_arg},
|
||||
{"",
|
||||
"--vae-tile-size",
|
||||
"tile size for vae tiling, format [X]x[Y] (default: 32x32)",
|
||||
on_tile_size_arg},
|
||||
{"",
|
||||
"--vae-relative-tile-size",
|
||||
"relative tile size for vae tiling, format [X]x[Y], in fraction of image size if < 1, in number of tiles per dim if >=1 (overrides --vae-tile-size)",
|
||||
on_relative_tile_size_arg},
|
||||
};
|
||||
|
||||
if (!parse_options(argc, argv, options)) {
|
||||
print_usage(argc, argv);
|
||||
print_usage(argc, argv, options);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
|
@ -857,19 +1123,19 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||
|
||||
if ((params.mode == IMG_GEN || params.mode == VID_GEN) && params.prompt.length() == 0) {
|
||||
fprintf(stderr, "error: the following arguments are required: prompt\n");
|
||||
print_usage(argc, argv);
|
||||
print_usage(argc, argv, options);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.mode != UPSCALE && params.model_path.length() == 0 && params.diffusion_model_path.length() == 0) {
|
||||
fprintf(stderr, "error: the following arguments are required: model_path/diffusion_model\n");
|
||||
print_usage(argc, argv);
|
||||
print_usage(argc, argv, options);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.output_path.length() == 0) {
|
||||
fprintf(stderr, "error: the following arguments are required: output_path\n");
|
||||
print_usage(argc, argv);
|
||||
print_usage(argc, argv, options);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
|
@ -933,7 +1199,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||
}
|
||||
|
||||
if (params.seed < 0) {
|
||||
srand((int)time(NULL));
|
||||
srand((int)time(nullptr));
|
||||
params.seed = rand();
|
||||
}
|
||||
|
||||
|
|
@ -1048,9 +1314,9 @@ void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
|
|||
uint8_t* load_image(const char* image_path, int& width, int& height, int expected_width = 0, int expected_height = 0, int expected_channel = 3) {
|
||||
int c = 0;
|
||||
uint8_t* image_buffer = (uint8_t*)stbi_load(image_path, &width, &height, &c, expected_channel);
|
||||
if (image_buffer == NULL) {
|
||||
if (image_buffer == nullptr) {
|
||||
fprintf(stderr, "load image from '%s' failed\n", image_path);
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
if (c < expected_channel) {
|
||||
fprintf(stderr,
|
||||
|
|
@ -1060,17 +1326,17 @@ uint8_t* load_image(const char* image_path, int& width, int& height, int expecte
|
|||
c,
|
||||
image_path);
|
||||
free(image_buffer);
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
if (width <= 0) {
|
||||
fprintf(stderr, "error: the width of image must be greater than 0, image_path = %s\n", image_path);
|
||||
free(image_buffer);
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
if (height <= 0) {
|
||||
fprintf(stderr, "error: the height of image must be greater than 0, image_path = %s\n", image_path);
|
||||
free(image_buffer);
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Resize input image ...
|
||||
|
|
@ -1092,10 +1358,10 @@ uint8_t* load_image(const char* image_path, int& width, int& height, int expecte
|
|||
if (crop_x != 0 || crop_y != 0) {
|
||||
printf("crop input image from %dx%d to %dx%d, image_path = %s\n", width, height, crop_w, crop_h, image_path);
|
||||
uint8_t* cropped_image_buffer = (uint8_t*)malloc(crop_w * crop_h * expected_channel);
|
||||
if (cropped_image_buffer == NULL) {
|
||||
if (cropped_image_buffer == nullptr) {
|
||||
fprintf(stderr, "error: allocate memory for crop\n");
|
||||
free(image_buffer);
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
for (int row = 0; row < crop_h; row++) {
|
||||
uint8_t* src = image_buffer + ((crop_y + row) * width + crop_x) * expected_channel;
|
||||
|
|
@ -1114,10 +1380,10 @@ uint8_t* load_image(const char* image_path, int& width, int& height, int expecte
|
|||
int resized_width = expected_width;
|
||||
|
||||
uint8_t* resized_image_buffer = (uint8_t*)malloc(resized_height * resized_width * expected_channel);
|
||||
if (resized_image_buffer == NULL) {
|
||||
if (resized_image_buffer == nullptr) {
|
||||
fprintf(stderr, "error: allocate memory for resize input image\n");
|
||||
free(image_buffer);
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
stbir_resize(image_buffer, width, height, 0,
|
||||
resized_image_buffer, resized_width, resized_height, 0, STBIR_TYPE_UINT8,
|
||||
|
|
@ -1168,7 +1434,7 @@ bool load_images_from_dir(const std::string dir,
|
|||
int width = 0;
|
||||
int height = 0;
|
||||
uint8_t* image_buffer = load_image(path.c_str(), width, height, expected_width, expected_height);
|
||||
if (image_buffer == NULL) {
|
||||
if (image_buffer == nullptr) {
|
||||
fprintf(stderr, "load image from '%s' failed\n", path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1220,10 +1486,10 @@ int main(int argc, const char* argv[]) {
|
|||
}
|
||||
|
||||
bool vae_decode_only = true;
|
||||
sd_image_t init_image = {(uint32_t)params.width, (uint32_t)params.height, 3, NULL};
|
||||
sd_image_t end_image = {(uint32_t)params.width, (uint32_t)params.height, 3, NULL};
|
||||
sd_image_t control_image = {(uint32_t)params.width, (uint32_t)params.height, 3, NULL};
|
||||
sd_image_t mask_image = {(uint32_t)params.width, (uint32_t)params.height, 1, NULL};
|
||||
sd_image_t init_image = {(uint32_t)params.width, (uint32_t)params.height, 3, nullptr};
|
||||
sd_image_t end_image = {(uint32_t)params.width, (uint32_t)params.height, 3, nullptr};
|
||||
sd_image_t control_image = {(uint32_t)params.width, (uint32_t)params.height, 3, nullptr};
|
||||
sd_image_t mask_image = {(uint32_t)params.width, (uint32_t)params.height, 1, nullptr};
|
||||
std::vector<sd_image_t> ref_images;
|
||||
std::vector<sd_image_t> pmid_images;
|
||||
std::vector<sd_image_t> control_frames;
|
||||
|
|
@ -1235,17 +1501,17 @@ int main(int argc, const char* argv[]) {
|
|||
free(mask_image.data);
|
||||
for (auto image : ref_images) {
|
||||
free(image.data);
|
||||
image.data = NULL;
|
||||
image.data = nullptr;
|
||||
}
|
||||
ref_images.clear();
|
||||
for (auto image : pmid_images) {
|
||||
free(image.data);
|
||||
image.data = NULL;
|
||||
image.data = nullptr;
|
||||
}
|
||||
pmid_images.clear();
|
||||
for (auto image : control_frames) {
|
||||
free(image.data);
|
||||
image.data = NULL;
|
||||
image.data = nullptr;
|
||||
}
|
||||
control_frames.clear();
|
||||
};
|
||||
|
|
@ -1256,7 +1522,7 @@ int main(int argc, const char* argv[]) {
|
|||
int width = 0;
|
||||
int height = 0;
|
||||
init_image.data = load_image(params.init_image_path.c_str(), width, height, params.width, params.height);
|
||||
if (init_image.data == NULL) {
|
||||
if (init_image.data == nullptr) {
|
||||
fprintf(stderr, "load image from '%s' failed\n", params.init_image_path.c_str());
|
||||
release_all_resources();
|
||||
return 1;
|
||||
|
|
@ -1269,7 +1535,7 @@ int main(int argc, const char* argv[]) {
|
|||
int width = 0;
|
||||
int height = 0;
|
||||
end_image.data = load_image(params.end_image_path.c_str(), width, height, params.width, params.height);
|
||||
if (end_image.data == NULL) {
|
||||
if (end_image.data == nullptr) {
|
||||
fprintf(stderr, "load image from '%s' failed\n", params.end_image_path.c_str());
|
||||
release_all_resources();
|
||||
return 1;
|
||||
|
|
@ -1281,7 +1547,7 @@ int main(int argc, const char* argv[]) {
|
|||
int width = 0;
|
||||
int height = 0;
|
||||
mask_image.data = load_image(params.mask_image_path.c_str(), width, height, params.width, params.height, 1);
|
||||
if (mask_image.data == NULL) {
|
||||
if (mask_image.data == nullptr) {
|
||||
fprintf(stderr, "load image from '%s' failed\n", params.mask_image_path.c_str());
|
||||
release_all_resources();
|
||||
return 1;
|
||||
|
|
@ -1289,7 +1555,7 @@ int main(int argc, const char* argv[]) {
|
|||
} else {
|
||||
mask_image.data = (uint8_t*)malloc(params.width * params.height);
|
||||
memset(mask_image.data, 255, params.width * params.height);
|
||||
if (mask_image.data == NULL) {
|
||||
if (mask_image.data == nullptr) {
|
||||
fprintf(stderr, "malloc mask image failed\n");
|
||||
release_all_resources();
|
||||
return 1;
|
||||
|
|
@ -1300,7 +1566,7 @@ int main(int argc, const char* argv[]) {
|
|||
int width = 0;
|
||||
int height = 0;
|
||||
control_image.data = load_image(params.control_image_path.c_str(), width, height, params.width, params.height);
|
||||
if (control_image.data == NULL) {
|
||||
if (control_image.data == nullptr) {
|
||||
fprintf(stderr, "load image from '%s' failed\n", params.control_image_path.c_str());
|
||||
release_all_resources();
|
||||
return 1;
|
||||
|
|
@ -1321,7 +1587,7 @@ int main(int argc, const char* argv[]) {
|
|||
int width = 0;
|
||||
int height = 0;
|
||||
uint8_t* image_buffer = load_image(path.c_str(), width, height);
|
||||
if (image_buffer == NULL) {
|
||||
if (image_buffer == nullptr) {
|
||||
fprintf(stderr, "load image from '%s' failed\n", path.c_str());
|
||||
release_all_resources();
|
||||
return 1;
|
||||
|
|
@ -1403,18 +1669,18 @@ int main(int argc, const char* argv[]) {
|
|||
if (params.mode == UPSCALE) {
|
||||
num_results = 1;
|
||||
results = (sd_image_t*)calloc(num_results, sizeof(sd_image_t));
|
||||
if (results == NULL) {
|
||||
if (results == nullptr) {
|
||||
printf("failed to allocate results array\n");
|
||||
release_all_resources();
|
||||
return 1;
|
||||
}
|
||||
|
||||
results[0] = init_image;
|
||||
init_image.data = NULL;
|
||||
init_image.data = nullptr;
|
||||
} else {
|
||||
sd_ctx_t* sd_ctx = new_sd_ctx(&sd_ctx_params);
|
||||
|
||||
if (sd_ctx == NULL) {
|
||||
if (sd_ctx == nullptr) {
|
||||
printf("new_sd_ctx_t failed\n");
|
||||
release_all_resources();
|
||||
return 1;
|
||||
|
|
@ -1477,7 +1743,7 @@ int main(int argc, const char* argv[]) {
|
|||
results = generate_video(sd_ctx, &vid_gen_params, &num_results);
|
||||
}
|
||||
|
||||
if (results == NULL) {
|
||||
if (results == nullptr) {
|
||||
printf("generate failed\n");
|
||||
free_sd_ctx(sd_ctx);
|
||||
return 1;
|
||||
|
|
@ -1493,17 +1759,17 @@ int main(int argc, const char* argv[]) {
|
|||
params.diffusion_conv_direct,
|
||||
params.n_threads);
|
||||
|
||||
if (upscaler_ctx == NULL) {
|
||||
if (upscaler_ctx == nullptr) {
|
||||
printf("new_upscaler_ctx failed\n");
|
||||
} else {
|
||||
for (int i = 0; i < num_results; i++) {
|
||||
if (results[i].data == NULL) {
|
||||
if (results[i].data == nullptr) {
|
||||
continue;
|
||||
}
|
||||
sd_image_t current_image = results[i];
|
||||
for (int u = 0; u < params.upscale_repeats; ++u) {
|
||||
sd_image_t upscaled_image = upscale(upscaler_ctx, current_image, upscale_factor);
|
||||
if (upscaled_image.data == NULL) {
|
||||
if (upscaled_image.data == nullptr) {
|
||||
printf("upscale failed\n");
|
||||
break;
|
||||
}
|
||||
|
|
@ -1561,7 +1827,7 @@ int main(int argc, const char* argv[]) {
|
|||
file_ext = ".png";
|
||||
}
|
||||
for (int i = 0; i < num_results; i++) {
|
||||
if (results[i].data == NULL) {
|
||||
if (results[i].data == nullptr) {
|
||||
continue;
|
||||
}
|
||||
std::string final_image_path = i > 0 ? base_path + "_" + std::to_string(i + 1) + file_ext : base_path + file_ext;
|
||||
|
|
@ -1579,7 +1845,7 @@ int main(int argc, const char* argv[]) {
|
|||
|
||||
for (int i = 0; i < num_results; i++) {
|
||||
free(results[i].data);
|
||||
results[i].data = NULL;
|
||||
results[i].data = nullptr;
|
||||
}
|
||||
free(results);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
#ifndef __MMDIT_HPP__
|
||||
#define __MMDIT_HPP__
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "ggml_extend.hpp"
|
||||
#include "model.h"
|
||||
|
||||
|
|
@ -208,8 +210,8 @@ public:
|
|||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x) {
|
||||
auto qkv = pre_attention(ctx, x);
|
||||
x = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads, NULL, false, false, true); // [N, n_token, dim]
|
||||
x = post_attention(ctx, x); // [N, n_token, dim]
|
||||
x = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads, nullptr, false, false, true); // [N, n_token, dim]
|
||||
x = post_attention(ctx, x); // [N, n_token, dim]
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
|
@ -347,7 +349,7 @@ public:
|
|||
auto attn_in = modulate(ctx, norm1->forward(ctx, x), shift_msa, scale_msa);
|
||||
auto qkv = attn->pre_attention(ctx, attn_in);
|
||||
|
||||
return {qkv, {NULL, NULL, NULL, NULL, NULL}};
|
||||
return {qkv, {nullptr, nullptr, nullptr, nullptr, nullptr}};
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -439,8 +441,8 @@ public:
|
|||
auto qkv2 = std::get<1>(qkv_intermediates);
|
||||
auto intermediates = std::get<2>(qkv_intermediates);
|
||||
|
||||
auto attn_out = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads, NULL, false, false, flash_attn); // [N, n_token, dim]
|
||||
auto attn2_out = ggml_nn_attention_ext(ctx, backend, qkv2[0], qkv2[1], qkv2[2], num_heads, NULL, false, false, flash_attn); // [N, n_token, dim]
|
||||
auto attn_out = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads, nullptr, false, false, flash_attn); // [N, n_token, dim]
|
||||
auto attn2_out = ggml_nn_attention_ext(ctx, backend, qkv2[0], qkv2[1], qkv2[2], num_heads, nullptr, false, false, flash_attn); // [N, n_token, dim]
|
||||
x = post_attention_x(ctx,
|
||||
attn_out,
|
||||
attn2_out,
|
||||
|
|
@ -456,7 +458,7 @@ public:
|
|||
auto qkv = qkv_intermediates.first;
|
||||
auto intermediates = qkv_intermediates.second;
|
||||
|
||||
auto attn_out = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads, NULL, false, false, flash_attn); // [N, n_token, dim]
|
||||
auto attn_out = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads, nullptr, false, false, flash_attn); // [N, n_token, dim]
|
||||
x = post_attention(ctx,
|
||||
attn_out,
|
||||
intermediates[0],
|
||||
|
|
@ -502,8 +504,8 @@ block_mixing(struct ggml_context* ctx,
|
|||
qkv.push_back(ggml_concat(ctx, context_qkv[i], x_qkv[i], 1));
|
||||
}
|
||||
|
||||
auto attn = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], x_block->num_heads, NULL, false, false, flash_attn); // [N, n_context + n_token, hidden_size]
|
||||
attn = ggml_cont(ctx, ggml_permute(ctx, attn, 0, 2, 1, 3)); // [n_context + n_token, N, hidden_size]
|
||||
auto attn = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], x_block->num_heads, nullptr, false, false, flash_attn); // [N, n_context + n_token, hidden_size]
|
||||
attn = ggml_cont(ctx, ggml_permute(ctx, attn, 0, 2, 1, 3)); // [n_context + n_token, N, hidden_size]
|
||||
auto context_attn = ggml_view_3d(ctx,
|
||||
attn,
|
||||
attn->ne[0],
|
||||
|
|
@ -532,7 +534,7 @@ block_mixing(struct ggml_context* ctx,
|
|||
context_intermediates[3],
|
||||
context_intermediates[4]);
|
||||
} else {
|
||||
context = NULL;
|
||||
context = nullptr;
|
||||
}
|
||||
|
||||
if (x_block->self_attn) {
|
||||
|
|
@ -645,7 +647,7 @@ protected:
|
|||
std::string qk_norm;
|
||||
bool flash_attn = false;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") override {
|
||||
enum ggml_type wtype = GGML_TYPE_F32;
|
||||
params["pos_embed"] = ggml_new_tensor_3d(ctx, wtype, hidden_size, num_patchs, 1);
|
||||
}
|
||||
|
|
@ -823,8 +825,8 @@ public:
|
|||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* t,
|
||||
struct ggml_tensor* y = NULL,
|
||||
struct ggml_tensor* context = NULL,
|
||||
struct ggml_tensor* y = nullptr,
|
||||
struct ggml_tensor* context = nullptr,
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
// Forward pass of DiT.
|
||||
// x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
|
||||
|
|
@ -843,14 +845,14 @@ public:
|
|||
x = ggml_add(ctx, patch_embed, pos_embed); // [N, H*W, hidden_size]
|
||||
|
||||
auto c = t_embedder->forward(ctx, t); // [N, hidden_size]
|
||||
if (y != NULL && adm_in_channels != -1) {
|
||||
if (y != nullptr && adm_in_channels != -1) {
|
||||
auto y_embedder = std::dynamic_pointer_cast<VectorEmbedder>(blocks["y_embedder"]);
|
||||
|
||||
y = y_embedder->forward(ctx, y); // [N, hidden_size]
|
||||
c = ggml_add(ctx, c, y);
|
||||
}
|
||||
|
||||
if (context != NULL) {
|
||||
if (context != nullptr) {
|
||||
auto context_embedder = std::dynamic_pointer_cast<Linear>(blocks["context_embedder"]);
|
||||
|
||||
context = context_embedder->forward(ctx, context); // [N, L, D] aka [N, L, 1536]
|
||||
|
|
@ -875,7 +877,7 @@ struct MMDiTRunner : public GGMLRunner {
|
|||
mmdit.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return "mmdit";
|
||||
}
|
||||
|
||||
|
|
@ -913,8 +915,8 @@ struct MMDiTRunner : public GGMLRunner {
|
|||
struct ggml_tensor* timesteps,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL,
|
||||
struct ggml_tensor** output = nullptr,
|
||||
struct ggml_context* output_ctx = nullptr,
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
// x: [N, in_channels, h, w]
|
||||
// timesteps: [N, ]
|
||||
|
|
@ -930,11 +932,11 @@ struct MMDiTRunner : public GGMLRunner {
|
|||
void test() {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 MB
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
|
||||
struct ggml_context* work_ctx = ggml_init(params);
|
||||
GGML_ASSERT(work_ctx != NULL);
|
||||
GGML_ASSERT(work_ctx != nullptr);
|
||||
|
||||
{
|
||||
// cpu f16: pass
|
||||
|
|
@ -955,7 +957,7 @@ struct MMDiTRunner : public GGMLRunner {
|
|||
ggml_set_f32(y, 0.01f);
|
||||
// print_ggml_tensor(y);
|
||||
|
||||
struct ggml_tensor* out = NULL;
|
||||
struct ggml_tensor* out = nullptr;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
compute(8, x, timesteps, context, y, &out, work_ctx);
|
||||
|
|
@ -970,7 +972,7 @@ struct MMDiTRunner : public GGMLRunner {
|
|||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_F16;
|
||||
std::shared_ptr<MMDiTRunner> mmdit = std::shared_ptr<MMDiTRunner>(new MMDiTRunner(backend, false, false));
|
||||
std::shared_ptr<MMDiTRunner> mmdit = std::make_shared<MMDiTRunner>(backend, false, false);
|
||||
{
|
||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#include <stdarg.h>
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cstdarg>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
|
|
@ -899,7 +899,6 @@ uint16_t f8_e5m2_to_f16(uint8_t fp8) {
|
|||
}
|
||||
|
||||
if (exponent == 0) { // subnormal numbers
|
||||
fp16_exponent = 0;
|
||||
fp16_mantissa = (mantissa << 8);
|
||||
return fp16_sign | fp16_mantissa;
|
||||
}
|
||||
|
|
@ -978,7 +977,7 @@ void convert_tensor(void* src,
|
|||
ggml_fp16_to_fp32_row((ggml_fp16_t*)src, (float*)dst, n);
|
||||
} else {
|
||||
auto qtype = ggml_get_type_traits(src_type);
|
||||
if (qtype->to_float == NULL) {
|
||||
if (qtype->to_float == nullptr) {
|
||||
throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available",
|
||||
ggml_type_name(src_type)));
|
||||
}
|
||||
|
|
@ -988,7 +987,7 @@ void convert_tensor(void* src,
|
|||
// src_type == GGML_TYPE_F16 => dst_type is quantized
|
||||
// src_type is quantized => dst_type == GGML_TYPE_F16 or dst_type is quantized
|
||||
auto qtype = ggml_get_type_traits(src_type);
|
||||
if (qtype->to_float == NULL) {
|
||||
if (qtype->to_float == nullptr) {
|
||||
throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available",
|
||||
ggml_type_name(src_type)));
|
||||
}
|
||||
|
|
@ -1050,7 +1049,7 @@ std::map<char, int> unicode_to_byte() {
|
|||
|
||||
bool is_zip_file(const std::string& file_path) {
|
||||
struct zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
|
||||
if (zip == NULL) {
|
||||
if (zip == nullptr) {
|
||||
return false;
|
||||
}
|
||||
zip_close(zip);
|
||||
|
|
@ -1157,8 +1156,8 @@ bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::s
|
|||
file_paths_.push_back(file_path);
|
||||
size_t file_index = file_paths_.size() - 1;
|
||||
|
||||
gguf_context* ctx_gguf_ = NULL;
|
||||
ggml_context* ctx_meta_ = NULL;
|
||||
gguf_context* ctx_gguf_ = nullptr;
|
||||
ggml_context* ctx_meta_ = nullptr;
|
||||
|
||||
ctx_gguf_ = gguf_init_from_file(file_path.c_str(), {true, &ctx_meta_});
|
||||
if (!ctx_gguf_) {
|
||||
|
|
@ -1774,7 +1773,7 @@ bool ModelLoader::init_from_ckpt_file(const std::string& file_path, const std::s
|
|||
size_t file_index = file_paths_.size() - 1;
|
||||
|
||||
struct zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
|
||||
if (zip == NULL) {
|
||||
if (zip == nullptr) {
|
||||
LOG_ERROR("failed to open '%s'", file_path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1787,7 +1786,7 @@ bool ModelLoader::init_from_ckpt_file(const std::string& file_path, const std::s
|
|||
if (pos != std::string::npos) {
|
||||
std::string dir = name.substr(0, pos);
|
||||
printf("ZIP %d, name = %s, dir = %s \n", i, name.c_str(), dir.c_str());
|
||||
void* pkl_data = NULL;
|
||||
void* pkl_data = nullptr;
|
||||
size_t pkl_size;
|
||||
zip_entry_read(zip, &pkl_data, &pkl_size);
|
||||
|
||||
|
|
@ -2218,10 +2217,10 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||
for (int i = 0; i < n_threads; ++i) {
|
||||
workers.emplace_back([&, file_path, is_zip]() {
|
||||
std::ifstream file;
|
||||
struct zip_t* zip = NULL;
|
||||
struct zip_t* zip = nullptr;
|
||||
if (is_zip) {
|
||||
zip = zip_open(file_path.c_str(), 0, 'r');
|
||||
if (zip == NULL) {
|
||||
if (zip == nullptr) {
|
||||
LOG_ERROR("failed to open zip '%s'", file_path.c_str());
|
||||
failed = true;
|
||||
return;
|
||||
|
|
@ -2252,7 +2251,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||
}
|
||||
|
||||
const TensorStorage& tensor_storage = *file_tensors[idx];
|
||||
ggml_tensor* dst_tensor = NULL;
|
||||
ggml_tensor* dst_tensor = nullptr;
|
||||
|
||||
t0 = ggml_time_ms();
|
||||
|
||||
|
|
@ -2262,7 +2261,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||
break;
|
||||
}
|
||||
|
||||
if (dst_tensor == NULL) {
|
||||
if (dst_tensor == nullptr) {
|
||||
t1 = ggml_time_ms();
|
||||
read_time_ms.fetch_add(t1 - t0);
|
||||
continue;
|
||||
|
|
@ -2271,7 +2270,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||
size_t nbytes_to_read = tensor_storage.nbytes_to_read();
|
||||
|
||||
auto read_data = [&](char* buf, size_t n) {
|
||||
if (zip != NULL) {
|
||||
if (zip != nullptr) {
|
||||
zip_entry_openbyindex(zip, tensor_storage.index_in_zip);
|
||||
size_t entry_size = zip_entry_size(zip);
|
||||
if (entry_size != n) {
|
||||
|
|
@ -2295,7 +2294,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||
}
|
||||
};
|
||||
|
||||
if (dst_tensor->buffer == NULL || ggml_backend_buffer_is_host(dst_tensor->buffer)) {
|
||||
if (dst_tensor->buffer == nullptr || ggml_backend_buffer_is_host(dst_tensor->buffer)) {
|
||||
if (tensor_storage.type == dst_tensor->type) {
|
||||
GGML_ASSERT(ggml_nbytes(dst_tensor) == tensor_storage.nbytes());
|
||||
if (tensor_storage.is_f64 || tensor_storage.is_i64) {
|
||||
|
|
@ -2397,7 +2396,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||
}
|
||||
}
|
||||
}
|
||||
if (zip != NULL) {
|
||||
if (zip != nullptr) {
|
||||
zip_close(zip);
|
||||
}
|
||||
});
|
||||
|
|
@ -2587,7 +2586,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
|
|||
mem_size += tensor_storages.size() * ggml_tensor_overhead();
|
||||
mem_size += get_params_mem_size(backend, type);
|
||||
LOG_INFO("model tensors mem size: %.2fMB", mem_size / 1024.f / 1024.f);
|
||||
ggml_context* ggml_ctx = ggml_init({mem_size, NULL, false});
|
||||
ggml_context* ggml_ctx = ggml_init({mem_size, nullptr, false});
|
||||
|
||||
gguf_context* gguf_ctx = gguf_init_empty();
|
||||
|
||||
|
|
@ -2613,7 +2612,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
|
|||
|
||||
std::lock_guard<std::mutex> lock(tensor_mutex);
|
||||
ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
|
||||
if (tensor == NULL) {
|
||||
if (tensor == nullptr) {
|
||||
LOG_ERROR("ggml_new_tensor failed");
|
||||
return false;
|
||||
}
|
||||
|
|
@ -2646,7 +2645,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
|
|||
|
||||
int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type) {
|
||||
size_t alignment = 128;
|
||||
if (backend != NULL) {
|
||||
if (backend != nullptr) {
|
||||
alignment = ggml_backend_get_alignment(backend);
|
||||
}
|
||||
int64_t mem_size = 0;
|
||||
|
|
@ -2676,7 +2675,7 @@ bool convert(const char* input_path, const char* vae_path, const char* output_pa
|
|||
return false;
|
||||
}
|
||||
|
||||
if (vae_path != NULL && strlen(vae_path) > 0) {
|
||||
if (vae_path != nullptr && strlen(vae_path) > 0) {
|
||||
if (!model_loader.init_from_file(vae_path, "vae.")) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", vae_path);
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
#include <sstream>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ggml-backend.h"
|
||||
|
|
@ -140,8 +141,8 @@ struct TensorStorage {
|
|||
|
||||
TensorStorage() = default;
|
||||
|
||||
TensorStorage(const std::string& name, ggml_type type, const int64_t* ne, int n_dims, size_t file_index, size_t offset = 0)
|
||||
: name(name), type(type), n_dims(n_dims), file_index(file_index), offset(offset) {
|
||||
TensorStorage(std::string name, ggml_type type, const int64_t* ne, int n_dims, size_t file_index, size_t offset = 0)
|
||||
: name(std::move(name)), type(type), n_dims(n_dims), file_index(file_index), offset(offset) {
|
||||
for (int i = 0; i < n_dims; i++) {
|
||||
this->ne[i] = ne[i];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -472,8 +472,8 @@ public:
|
|||
struct ggml_tensor* prompt_embeds_d = to_backend(prompt_embeds);
|
||||
struct ggml_tensor* id_embeds_d = to_backend(id_embeds);
|
||||
|
||||
struct ggml_tensor* left = NULL;
|
||||
struct ggml_tensor* right = NULL;
|
||||
struct ggml_tensor* left = nullptr;
|
||||
struct ggml_tensor* right = nullptr;
|
||||
for (int i = 0; i < class_tokens_mask.size(); i++) {
|
||||
if (class_tokens_mask[i]) {
|
||||
// printf(" 1,");
|
||||
|
|
@ -528,7 +528,7 @@ public:
|
|||
}
|
||||
}
|
||||
}
|
||||
struct ggml_tensor* updated_prompt_embeds = NULL;
|
||||
struct ggml_tensor* updated_prompt_embeds = nullptr;
|
||||
if (pm_version == PM_VERSION_1)
|
||||
updated_prompt_embeds = id_encoder.forward(ctx0,
|
||||
runtime_backend,
|
||||
|
|
@ -638,7 +638,7 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
|
|||
pos = tensors.find("pmid.id_embeds");
|
||||
if (pos != tensors.end())
|
||||
return pos->second;
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml_tensor* kernel, int padding) {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = 80 * input->ne[0] * input->ne[1]; // 20M for 512x512
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
struct ggml_context* ctx0 = ggml_init(params);
|
||||
struct ggml_tensor* kernel_fp16 = ggml_new_tensor_4d(ctx0, GGML_TYPE_F16, kernel->ne[0], kernel->ne[1], 1, 1);
|
||||
|
|
@ -165,7 +165,7 @@ void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float lo
|
|||
bool preprocess_canny(sd_image_t img, float high_threshold, float low_threshold, float weak, float strong, bool inverse) {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(40 * img.width * img.height); // 10MB for 512x512
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
struct ggml_context* work_ctx = ggml_init(params);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
#ifndef __QWEN_IMAGE_HPP__
|
||||
#define __QWEN_IMAGE_HPP__
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common.hpp"
|
||||
#include "flux.hpp"
|
||||
#include "ggml_extend.hpp"
|
||||
|
|
@ -534,12 +536,12 @@ namespace Qwen {
|
|||
continue;
|
||||
}
|
||||
}
|
||||
LOG_ERROR("qwen_image_params.num_layers: %ld", qwen_image_params.num_layers);
|
||||
LOG_INFO("qwen_image_params.num_layers: %ld", qwen_image_params.num_layers);
|
||||
qwen_image = QwenImageModel(qwen_image_params);
|
||||
qwen_image.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return "qwen_image";
|
||||
}
|
||||
|
||||
|
|
@ -577,7 +579,7 @@ namespace Qwen {
|
|||
auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, qwen_image_params.axes_dim_sum / 2, pos_len);
|
||||
// pe->data = pe_vec.data();
|
||||
// print_ggml_tensor(pe, true, "pe");
|
||||
// pe->data = NULL;
|
||||
// pe->data = nullptr;
|
||||
set_backend_tensor_data(pe, pe_vec.data());
|
||||
|
||||
struct ggml_tensor* out = qwen_image.forward(compute_ctx,
|
||||
|
|
@ -599,8 +601,8 @@ namespace Qwen {
|
|||
struct ggml_tensor* context,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
bool increase_ref_index = false,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
struct ggml_tensor** output = nullptr,
|
||||
struct ggml_context* output_ctx = nullptr) {
|
||||
// x: [N, in_channels, h, w]
|
||||
// timesteps: [N, ]
|
||||
// context: [N, max_position, hidden_size]
|
||||
|
|
@ -614,11 +616,11 @@ namespace Qwen {
|
|||
void test() {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(1024 * 1024) * 1024; // 1GB
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
|
||||
struct ggml_context* work_ctx = ggml_init(params);
|
||||
GGML_ASSERT(work_ctx != NULL);
|
||||
GGML_ASSERT(work_ctx != nullptr);
|
||||
|
||||
{
|
||||
// auto x = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 16, 16, 16, 1);
|
||||
|
|
@ -634,7 +636,7 @@ namespace Qwen {
|
|||
auto context = load_tensor_from_file(work_ctx, "./qwen_image_context.bin");
|
||||
print_ggml_tensor(context);
|
||||
|
||||
struct ggml_tensor* out = NULL;
|
||||
struct ggml_tensor* out = nullptr;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
compute(8, x, timesteps, context, {}, false, &out, work_ctx);
|
||||
|
|
@ -666,12 +668,12 @@ namespace Qwen {
|
|||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<QwenImageRunner> qwen_image = std::shared_ptr<QwenImageRunner>(new QwenImageRunner(backend,
|
||||
false,
|
||||
tensor_types,
|
||||
"model.diffusion_model",
|
||||
VERSION_QWEN_IMAGE,
|
||||
true));
|
||||
std::shared_ptr<QwenImageRunner> qwen_image = std::make_shared<QwenImageRunner>(backend,
|
||||
false,
|
||||
tensor_types,
|
||||
"model.diffusion_model",
|
||||
VERSION_QWEN_IMAGE,
|
||||
true);
|
||||
|
||||
qwen_image->alloc_params_buffer();
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
|
|
|
|||
|
|
@ -5,11 +5,13 @@
|
|||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <regex>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "clip.hpp"
|
||||
|
|
@ -589,7 +591,7 @@ namespace Qwen {
|
|||
int64_t window_size,
|
||||
std::set<int> fullatt_block_indexes = {7, 15, 23, 31},
|
||||
float eps = 1e-6f)
|
||||
: num_layers(num_layers), fullatt_block_indexes(fullatt_block_indexes), spatial_merge_size(spatial_merge_size) {
|
||||
: num_layers(num_layers), fullatt_block_indexes(std::move(fullatt_block_indexes)), spatial_merge_size(spatial_merge_size) {
|
||||
blocks["patch_embed"] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VisionPatchEmbed(llama_cpp_style,
|
||||
patch_size,
|
||||
temporal_patch_size,
|
||||
|
|
@ -949,7 +951,7 @@ namespace Qwen {
|
|||
model.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return "qwenvl2.5";
|
||||
}
|
||||
|
||||
|
|
@ -1011,7 +1013,7 @@ namespace Qwen {
|
|||
struct ggml_tensor* input_ids,
|
||||
std::vector<std::pair<int, ggml_tensor*>> image_embeds,
|
||||
ggml_tensor** output,
|
||||
ggml_context* output_ctx = NULL) {
|
||||
ggml_context* output_ctx = nullptr) {
|
||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_graph(input_ids, image_embeds);
|
||||
};
|
||||
|
|
@ -1162,7 +1164,7 @@ namespace Qwen {
|
|||
auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, head_dim / 2, pos_len);
|
||||
// pe->data = pe_vec.data();
|
||||
// print_ggml_tensor(pe);
|
||||
// pe->data = NULL;
|
||||
// pe->data = nullptr;
|
||||
set_backend_tensor_data(pe, pe_vec.data());
|
||||
|
||||
struct ggml_tensor* hidden_states = vision_forward(compute_ctx,
|
||||
|
|
@ -1180,7 +1182,7 @@ namespace Qwen {
|
|||
void encode_image(const int n_threads,
|
||||
struct ggml_tensor* image,
|
||||
ggml_tensor** output,
|
||||
ggml_context* output_ctx = NULL) {
|
||||
ggml_context* output_ctx = nullptr) {
|
||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_encode_image_graph(image);
|
||||
};
|
||||
|
|
@ -1246,11 +1248,11 @@ namespace Qwen {
|
|||
void test() {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(1024 * 1024) * 1024; // 1GB
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
|
||||
struct ggml_context* work_ctx = ggml_init(params);
|
||||
GGML_ASSERT(work_ctx != NULL);
|
||||
GGML_ASSERT(work_ctx != nullptr);
|
||||
bool test_vit = true;
|
||||
bool test_decoder_with_vit = true;
|
||||
|
||||
|
|
@ -1259,7 +1261,7 @@ namespace Qwen {
|
|||
{
|
||||
auto image = load_tensor_from_file(work_ctx, "qwen2vl_normalized.bin");
|
||||
print_ggml_tensor(image, false, "image");
|
||||
struct ggml_tensor* out = NULL;
|
||||
struct ggml_tensor* out = nullptr;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
model.encode_image(8, image, &out, work_ctx);
|
||||
|
|
@ -1295,7 +1297,7 @@ namespace Qwen {
|
|||
}
|
||||
printf("\n");
|
||||
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, tokens);
|
||||
struct ggml_tensor* out = NULL;
|
||||
struct ggml_tensor* out = nullptr;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
model.compute(8, input_ids, image_embeds, &out, work_ctx);
|
||||
|
|
@ -1308,7 +1310,7 @@ namespace Qwen {
|
|||
// ggml_set_f32(image, 0.f);
|
||||
auto image = load_tensor_from_file(work_ctx, "qwen2vl_normalized.bin");
|
||||
print_ggml_tensor(image, false, "image");
|
||||
struct ggml_tensor* out = NULL;
|
||||
struct ggml_tensor* out = nullptr;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
model.encode_image(8, image, &out, work_ctx);
|
||||
|
|
@ -1330,7 +1332,7 @@ namespace Qwen {
|
|||
}
|
||||
printf("\n");
|
||||
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, tokens);
|
||||
struct ggml_tensor* out = NULL;
|
||||
struct ggml_tensor* out = nullptr;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
model.compute(8, input_ids, {}, &out, work_ctx);
|
||||
|
|
@ -1361,11 +1363,11 @@ namespace Qwen {
|
|||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Qwen2_5_VLEmbedder> qwenvl = std::shared_ptr<Qwen2_5_VLEmbedder>(new Qwen2_5_VLEmbedder(backend,
|
||||
false,
|
||||
tensor_types,
|
||||
"qwen2vl",
|
||||
true));
|
||||
std::shared_ptr<Qwen2_5_VLEmbedder> qwenvl = std::make_shared<Qwen2_5_VLEmbedder>(backend,
|
||||
false,
|
||||
tensor_types,
|
||||
"qwen2vl",
|
||||
true);
|
||||
|
||||
qwenvl->alloc_params_buffer();
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
|
|
|
|||
|
|
@ -15,11 +15,11 @@ private:
|
|||
std::default_random_engine generator;
|
||||
|
||||
public:
|
||||
void manual_seed(uint64_t seed) {
|
||||
void manual_seed(uint64_t seed) override {
|
||||
generator.seed((unsigned int)seed);
|
||||
}
|
||||
|
||||
std::vector<float> randn(uint32_t n) {
|
||||
std::vector<float> randn(uint32_t n) override {
|
||||
std::vector<float> result;
|
||||
float mean = 0.0;
|
||||
float stddev = 1.0;
|
||||
|
|
|
|||
|
|
@ -93,12 +93,12 @@ public:
|
|||
this->offset = 0;
|
||||
}
|
||||
|
||||
void manual_seed(uint64_t seed) {
|
||||
void manual_seed(uint64_t seed) override {
|
||||
this->seed = seed;
|
||||
this->offset = 0;
|
||||
}
|
||||
|
||||
std::vector<float> randn(uint32_t n) {
|
||||
std::vector<float> randn(uint32_t n) override {
|
||||
std::vector<std::vector<uint32_t>> counter(4, std::vector<uint32_t>(n, 0));
|
||||
for (uint32_t i = 0; i < n; i++) {
|
||||
counter[0][i] = this->offset;
|
||||
|
|
|
|||
|
|
@ -87,10 +87,10 @@ void calculate_alphas_cumprod(float* alphas_cumprod,
|
|||
|
||||
class StableDiffusionGGML {
|
||||
public:
|
||||
ggml_backend_t backend = NULL; // general backend
|
||||
ggml_backend_t clip_backend = NULL;
|
||||
ggml_backend_t control_net_backend = NULL;
|
||||
ggml_backend_t vae_backend = NULL;
|
||||
ggml_backend_t backend = nullptr; // general backend
|
||||
ggml_backend_t clip_backend = nullptr;
|
||||
ggml_backend_t control_net_backend = nullptr;
|
||||
ggml_backend_t vae_backend = nullptr;
|
||||
|
||||
SDVersion version;
|
||||
bool vae_decode_only = false;
|
||||
|
|
@ -99,6 +99,7 @@ public:
|
|||
std::shared_ptr<RNG> rng = std::make_shared<STDDefaultRNG>();
|
||||
int n_threads = -1;
|
||||
float scale_factor = 0.18215f;
|
||||
float shift_factor = 0.f;
|
||||
|
||||
std::shared_ptr<Conditioner> cond_stage_model;
|
||||
std::shared_ptr<FrozenCLIPVisionEmbedder> clip_vision; // for svd or wan2.1 i2v
|
||||
|
|
@ -106,7 +107,7 @@ public:
|
|||
std::shared_ptr<DiffusionModel> high_noise_diffusion_model;
|
||||
std::shared_ptr<VAE> first_stage_model;
|
||||
std::shared_ptr<TinyAutoEncoder> tae_first_stage;
|
||||
std::shared_ptr<ControlNet> control_net = NULL;
|
||||
std::shared_ptr<ControlNet> control_net = nullptr;
|
||||
std::shared_ptr<PhotoMakerIDEncoder> pmid_model;
|
||||
std::shared_ptr<LoraModel> pmid_lora;
|
||||
std::shared_ptr<PhotoMakerIDEmbed> pmid_id_embeds;
|
||||
|
|
@ -448,9 +449,10 @@ public:
|
|||
scale_factor = 0.13025f;
|
||||
} else if (sd_version_is_sd3(version)) {
|
||||
scale_factor = 1.5305f;
|
||||
shift_factor = 0.0609f;
|
||||
} else if (sd_version_is_flux(version)) {
|
||||
scale_factor = 0.3611f;
|
||||
// TODO: shift_factor
|
||||
shift_factor = 0.1159f;
|
||||
} else if (sd_version_is_wan(version) || sd_version_is_qwen_image(version)) {
|
||||
scale_factor = 1.0f;
|
||||
}
|
||||
|
|
@ -652,7 +654,7 @@ public:
|
|||
// first_stage_model->get_param_tensors(tensors, "first_stage_model.");
|
||||
|
||||
if (strlen(SAFE_STR(sd_ctx_params->control_net_path)) > 0) {
|
||||
ggml_backend_t controlnet_backend = NULL;
|
||||
ggml_backend_t controlnet_backend = nullptr;
|
||||
if (sd_ctx_params->keep_control_net_on_cpu && !ggml_backend_is_cpu(backend)) {
|
||||
LOG_DEBUG("ControlNet: Using CPU backend");
|
||||
controlnet_backend = ggml_backend_cpu_init();
|
||||
|
|
@ -712,11 +714,11 @@ public:
|
|||
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(10 * 1024) * 1024; // 10M
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
// LOG_DEBUG("mem_size %u ", params.mem_size);
|
||||
struct ggml_context* ctx = ggml_init(params); // for alphas_cumprod and is_using_v_parameterization check
|
||||
GGML_ASSERT(ctx != NULL);
|
||||
GGML_ASSERT(ctx != nullptr);
|
||||
ggml_tensor* alphas_cumprod_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, TIMESTEPS);
|
||||
calculate_alphas_cumprod((float*)alphas_cumprod_tensor->data);
|
||||
|
||||
|
|
@ -887,11 +889,14 @@ public:
|
|||
denoiser = std::make_shared<DiscreteFlowDenoiser>(shift);
|
||||
} else if (sd_version_is_flux(version)) {
|
||||
LOG_INFO("running in Flux FLOW mode");
|
||||
float shift = 1.0f; // TODO: validate
|
||||
for (auto pair : model_loader.tensor_storages_types) {
|
||||
if (pair.first.find("model.diffusion_model.guidance_in.in_layer.weight") != std::string::npos) {
|
||||
shift = 1.15f;
|
||||
break;
|
||||
float shift = sd_ctx_params->flow_shift;
|
||||
if (shift == INFINITY) {
|
||||
shift = 1.0f; // TODO: validate
|
||||
for (auto pair : model_loader.tensor_storages_types) {
|
||||
if (pair.first.find("model.diffusion_model.guidance_in.in_layer.weight") != std::string::npos) {
|
||||
shift = 1.15f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
denoiser = std::make_shared<FluxFlowDenoiser>(shift);
|
||||
|
|
@ -991,8 +996,8 @@ public:
|
|||
struct ggml_tensor* timesteps = ggml_new_tensor_1d(work_ctx, GGML_TYPE_F32, 1);
|
||||
ggml_set_f32(timesteps, 999);
|
||||
|
||||
struct ggml_tensor* concat = is_inpaint ? ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 8, 8, 5, 1) : NULL;
|
||||
if (concat != NULL) {
|
||||
struct ggml_tensor* concat = is_inpaint ? ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 8, 8, 5, 1) : nullptr;
|
||||
if (concat != nullptr) {
|
||||
ggml_set_f32(concat, 0);
|
||||
}
|
||||
|
||||
|
|
@ -1139,7 +1144,7 @@ public:
|
|||
ggml_tensor* prompts_embeds,
|
||||
ggml_tensor* id_embeds,
|
||||
std::vector<bool>& class_tokens_mask) {
|
||||
ggml_tensor* res = NULL;
|
||||
ggml_tensor* res = nullptr;
|
||||
pmid_model->compute(n_threads, init_img, prompts_embeds, id_embeds, class_tokens_mask, &res, work_ctx);
|
||||
return res;
|
||||
}
|
||||
|
|
@ -1149,7 +1154,7 @@ public:
|
|||
bool return_pooled = true,
|
||||
int clip_skip = -1,
|
||||
bool zero_out_masked = false) {
|
||||
ggml_tensor* output = NULL;
|
||||
ggml_tensor* output = nullptr;
|
||||
if (zero_out_masked) {
|
||||
if (return_pooled) {
|
||||
output = ggml_new_tensor_1d(work_ctx,
|
||||
|
|
@ -1167,12 +1172,12 @@ public:
|
|||
sd_image_f32_t image = sd_image_t_to_sd_image_f32_t(init_image);
|
||||
sd_image_f32_t resized_image = clip_preprocess(image, clip_vision->vision_model.image_size, clip_vision->vision_model.image_size);
|
||||
free(image.data);
|
||||
image.data = NULL;
|
||||
image.data = nullptr;
|
||||
|
||||
ggml_tensor* pixel_values = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, resized_image.width, resized_image.height, 3, 1);
|
||||
sd_image_f32_to_tensor(resized_image, pixel_values, false);
|
||||
free(resized_image.data);
|
||||
resized_image.data = NULL;
|
||||
resized_image.data = nullptr;
|
||||
|
||||
// print_ggml_tensor(pixel_values);
|
||||
clip_vision->compute(n_threads, pixel_values, return_pooled, clip_skip, &output, work_ctx);
|
||||
|
|
@ -1194,7 +1199,7 @@ public:
|
|||
struct ggml_tensor* c_crossattn = get_clip_vision_output(work_ctx, init_image, true, -1, zero_out_masked);
|
||||
|
||||
// c_concat
|
||||
struct ggml_tensor* c_concat = NULL;
|
||||
struct ggml_tensor* c_concat = nullptr;
|
||||
{
|
||||
if (zero_out_masked) {
|
||||
c_concat = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width / 8, height / 8, 4, 1);
|
||||
|
|
@ -1206,10 +1211,10 @@ public:
|
|||
sd_image_f32_t image = sd_image_t_to_sd_image_f32_t(init_image);
|
||||
sd_image_f32_t resized_image = resize_sd_image_f32_t(image, width, height);
|
||||
free(image.data);
|
||||
image.data = NULL;
|
||||
image.data = nullptr;
|
||||
sd_image_f32_to_tensor(resized_image, init_img, false);
|
||||
free(resized_image.data);
|
||||
resized_image.data = NULL;
|
||||
resized_image.data = nullptr;
|
||||
} else {
|
||||
sd_image_to_tensor(init_image, init_img);
|
||||
}
|
||||
|
|
@ -1226,7 +1231,7 @@ public:
|
|||
}
|
||||
|
||||
// y
|
||||
struct ggml_tensor* y = NULL;
|
||||
struct ggml_tensor* y = nullptr;
|
||||
{
|
||||
y = ggml_new_tensor_1d(work_ctx, GGML_TYPE_F32, diffusion_model->get_adm_in_channels());
|
||||
int out_dim = 256;
|
||||
|
|
@ -1245,7 +1250,7 @@ public:
|
|||
if (diffusion_model->get_desc() == "Wan2.2-TI2V-5B") {
|
||||
auto new_timesteps = std::vector<float>(init_latent->ne[2], timesteps[0]);
|
||||
|
||||
if (denoise_mask != NULL) {
|
||||
if (denoise_mask != nullptr) {
|
||||
float value = ggml_tensor_get_f32(denoise_mask, 0, 0, 0, 0);
|
||||
if (value == 0.f) {
|
||||
new_timesteps[0] = 0.f;
|
||||
|
|
@ -1292,8 +1297,8 @@ public:
|
|||
SDCondition id_cond,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
bool increase_ref_index = false,
|
||||
ggml_tensor* denoise_mask = NULL,
|
||||
ggml_tensor* vace_context = NULL,
|
||||
ggml_tensor* denoise_mask = nullptr,
|
||||
ggml_tensor* vace_context = nullptr,
|
||||
float vace_strength = 1.f) {
|
||||
if (shifted_timestep > 0 && !sd_version_is_sdxl(version)) {
|
||||
LOG_WARN("timestep shifting is only supported for SDXL models!");
|
||||
|
|
@ -1320,15 +1325,15 @@ public:
|
|||
|
||||
struct ggml_tensor* noised_input = ggml_dup_tensor(work_ctx, x);
|
||||
|
||||
bool has_unconditioned = img_cfg_scale != 1.0 && uncond.c_crossattn != NULL;
|
||||
bool has_img_cond = cfg_scale != img_cfg_scale && img_cond.c_crossattn != NULL;
|
||||
bool has_unconditioned = img_cfg_scale != 1.0 && uncond.c_crossattn != nullptr;
|
||||
bool has_img_cond = cfg_scale != img_cfg_scale && img_cond.c_crossattn != nullptr;
|
||||
bool has_skiplayer = slg_scale != 0.0 && skip_layers.size() > 0;
|
||||
|
||||
// denoise wrapper
|
||||
struct ggml_tensor* out_cond = ggml_dup_tensor(work_ctx, x);
|
||||
struct ggml_tensor* out_uncond = NULL;
|
||||
struct ggml_tensor* out_skip = NULL;
|
||||
struct ggml_tensor* out_img_cond = NULL;
|
||||
struct ggml_tensor* out_uncond = nullptr;
|
||||
struct ggml_tensor* out_skip = nullptr;
|
||||
struct ggml_tensor* out_img_cond = nullptr;
|
||||
|
||||
if (has_unconditioned) {
|
||||
out_uncond = ggml_dup_tensor(work_ctx, x);
|
||||
|
|
@ -1386,7 +1391,7 @@ public:
|
|||
|
||||
std::vector<struct ggml_tensor*> controls;
|
||||
|
||||
if (control_hint != NULL && control_net != NULL) {
|
||||
if (control_hint != nullptr && control_net != nullptr) {
|
||||
control_net->compute(n_threads, noised_input, control_hint, timesteps, cond.c_crossattn, cond.c_vector);
|
||||
controls = control_net->controls;
|
||||
// print_ggml_tensor(controls[12]);
|
||||
|
|
@ -1421,10 +1426,10 @@ public:
|
|||
&out_cond);
|
||||
}
|
||||
|
||||
float* negative_data = NULL;
|
||||
float* negative_data = nullptr;
|
||||
if (has_unconditioned) {
|
||||
// uncond
|
||||
if (control_hint != NULL && control_net != NULL) {
|
||||
if (control_hint != nullptr && control_net != nullptr) {
|
||||
control_net->compute(n_threads, noised_input, control_hint, timesteps, uncond.c_crossattn, uncond.c_vector);
|
||||
controls = control_net->controls;
|
||||
}
|
||||
|
|
@ -1438,7 +1443,7 @@ public:
|
|||
negative_data = (float*)out_uncond->data;
|
||||
}
|
||||
|
||||
float* img_cond_data = NULL;
|
||||
float* img_cond_data = nullptr;
|
||||
if (has_img_cond) {
|
||||
diffusion_params.context = img_cond.c_crossattn;
|
||||
diffusion_params.c_concat = img_cond.c_concat;
|
||||
|
|
@ -1451,7 +1456,7 @@ public:
|
|||
|
||||
int step_count = sigmas.size();
|
||||
bool is_skiplayer_step = has_skiplayer && step > (int)(guidance.slg.layer_start * step_count) && step < (int)(guidance.slg.layer_end * step_count);
|
||||
float* skip_layer_data = NULL;
|
||||
float* skip_layer_data = nullptr;
|
||||
if (is_skiplayer_step) {
|
||||
LOG_DEBUG("Skipping layers at step %d\n", step);
|
||||
// skip layer (same as conditionned)
|
||||
|
|
@ -1567,7 +1572,11 @@ public:
|
|||
}
|
||||
}
|
||||
} else {
|
||||
ggml_tensor_scale(latent, scale_factor);
|
||||
ggml_tensor_iter(latent, [&](ggml_tensor* latent, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
|
||||
float value = ggml_tensor_get_f32(latent, i0, i1, i2, i3);
|
||||
value = (value - shift_factor) * scale_factor;
|
||||
ggml_tensor_set_f32(latent, value, i0, i1, i2, i3);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1607,7 +1616,11 @@ public:
|
|||
}
|
||||
}
|
||||
} else {
|
||||
ggml_tensor_scale(latent, 1.0f / scale_factor);
|
||||
ggml_tensor_iter(latent, [&](ggml_tensor* latent, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
|
||||
float value = ggml_tensor_get_f32(latent, i0, i1, i2, i3);
|
||||
value = (value / scale_factor) + shift_factor;
|
||||
ggml_tensor_set_f32(latent, value, i0, i1, i2, i3);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1642,7 +1655,7 @@ public:
|
|||
|
||||
ggml_tensor* vae_encode(ggml_context* work_ctx, ggml_tensor* x, bool encode_video = false) {
|
||||
int64_t t0 = ggml_time_ms();
|
||||
ggml_tensor* result = NULL;
|
||||
ggml_tensor* result = nullptr;
|
||||
int W = x->ne[0] / 8;
|
||||
int H = x->ne[1] / 8;
|
||||
if (vae_tiling_params.enabled && !encode_video) {
|
||||
|
|
@ -1689,7 +1702,7 @@ public:
|
|||
if (vae_tiling_params.enabled && !encode_video) {
|
||||
// split latent in 32x32 tiles and compute in several steps
|
||||
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
|
||||
tae_first_stage->compute(n_threads, in, false, &out, NULL);
|
||||
tae_first_stage->compute(n_threads, in, false, &out, nullptr);
|
||||
};
|
||||
sd_tiling(x, result, 8, 64, 0.5f, on_tiling);
|
||||
} else {
|
||||
|
|
@ -1764,7 +1777,7 @@ public:
|
|||
int64_t W = x->ne[0] * 8;
|
||||
int64_t H = x->ne[1] * 8;
|
||||
int64_t C = 3;
|
||||
ggml_tensor* result = NULL;
|
||||
ggml_tensor* result = nullptr;
|
||||
if (decode_video) {
|
||||
int T = x->ne[2];
|
||||
if (sd_version_is_wan(version)) {
|
||||
|
|
@ -1804,7 +1817,7 @@ public:
|
|||
|
||||
// split latent in 32x32 tiles and compute in several steps
|
||||
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
|
||||
first_stage_model->compute(n_threads, in, true, &out, NULL);
|
||||
first_stage_model->compute(n_threads, in, true, &out, nullptr);
|
||||
};
|
||||
sd_tiling_non_square(x, result, 8, tile_size_x, tile_size_y, tile_overlap, on_tiling);
|
||||
} else {
|
||||
|
|
@ -1981,7 +1994,7 @@ void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
|
|||
char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
|
||||
char* buf = (char*)malloc(4096);
|
||||
if (!buf)
|
||||
return NULL;
|
||||
return nullptr;
|
||||
buf[0] = '\0';
|
||||
|
||||
snprintf(buf + strlen(buf), 4096 - strlen(buf),
|
||||
|
|
@ -2001,7 +2014,6 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
|
|||
"embedding_dir: %s\n"
|
||||
"photo_maker_path: %s\n"
|
||||
"vae_decode_only: %s\n"
|
||||
"vae_tiling: %s\n"
|
||||
"free_params_immediately: %s\n"
|
||||
"n_threads: %d\n"
|
||||
"wtype: %s\n"
|
||||
|
|
@ -2065,7 +2077,7 @@ void sd_sample_params_init(sd_sample_params_t* sample_params) {
|
|||
char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
|
||||
char* buf = (char*)malloc(4096);
|
||||
if (!buf)
|
||||
return NULL;
|
||||
return nullptr;
|
||||
buf[0] = '\0';
|
||||
|
||||
snprintf(buf + strlen(buf), 4096 - strlen(buf),
|
||||
|
|
@ -2117,7 +2129,7 @@ void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params) {
|
|||
char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
|
||||
char* buf = (char*)malloc(4096);
|
||||
if (!buf)
|
||||
return NULL;
|
||||
return nullptr;
|
||||
buf[0] = '\0';
|
||||
|
||||
char* sample_params_str = sd_sample_params_to_str(&sd_img_gen_params->sample_params);
|
||||
|
|
@ -2174,40 +2186,40 @@ void sd_vid_gen_params_init(sd_vid_gen_params_t* sd_vid_gen_params) {
|
|||
}
|
||||
|
||||
struct sd_ctx_t {
|
||||
StableDiffusionGGML* sd = NULL;
|
||||
StableDiffusionGGML* sd = nullptr;
|
||||
};
|
||||
|
||||
sd_ctx_t* new_sd_ctx(const sd_ctx_params_t* sd_ctx_params) {
|
||||
sd_ctx_t* sd_ctx = (sd_ctx_t*)malloc(sizeof(sd_ctx_t));
|
||||
if (sd_ctx == NULL) {
|
||||
return NULL;
|
||||
if (sd_ctx == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
sd_ctx->sd = new StableDiffusionGGML();
|
||||
if (sd_ctx->sd == NULL) {
|
||||
if (sd_ctx->sd == nullptr) {
|
||||
free(sd_ctx);
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!sd_ctx->sd->init(sd_ctx_params)) {
|
||||
delete sd_ctx->sd;
|
||||
sd_ctx->sd = NULL;
|
||||
sd_ctx->sd = nullptr;
|
||||
free(sd_ctx);
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
return sd_ctx;
|
||||
}
|
||||
|
||||
void free_sd_ctx(sd_ctx_t* sd_ctx) {
|
||||
if (sd_ctx->sd != NULL) {
|
||||
if (sd_ctx->sd != nullptr) {
|
||||
delete sd_ctx->sd;
|
||||
sd_ctx->sd = NULL;
|
||||
sd_ctx->sd = nullptr;
|
||||
}
|
||||
free(sd_ctx);
|
||||
}
|
||||
|
||||
enum sample_method_t sd_get_default_sample_method(const sd_ctx_t* sd_ctx) {
|
||||
if (sd_ctx != NULL && sd_ctx->sd != NULL) {
|
||||
if (sd_ctx != nullptr && sd_ctx->sd != nullptr) {
|
||||
SDVersion version = sd_ctx->sd->version;
|
||||
if (sd_version_is_dit(version))
|
||||
return EULER;
|
||||
|
|
@ -2238,13 +2250,13 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||
std::vector<sd_image_t*> ref_images,
|
||||
std::vector<ggml_tensor*> ref_latents,
|
||||
bool increase_ref_index,
|
||||
ggml_tensor* concat_latent = NULL,
|
||||
ggml_tensor* denoise_mask = NULL) {
|
||||
ggml_tensor* concat_latent = nullptr,
|
||||
ggml_tensor* denoise_mask = nullptr) {
|
||||
if (seed < 0) {
|
||||
// Generally, when using the provided command line, the seed is always >0.
|
||||
// However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library
|
||||
// by a third party with a seed <0, let's incorporate randomization here.
|
||||
srand((int)time(NULL));
|
||||
srand((int)time(nullptr));
|
||||
seed = rand();
|
||||
}
|
||||
|
||||
|
|
@ -2265,7 +2277,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||
|
||||
// Photo Maker
|
||||
std::string prompt_text_only;
|
||||
ggml_tensor* init_img = NULL;
|
||||
ggml_tensor* init_img = nullptr;
|
||||
SDCondition id_cond;
|
||||
std::vector<bool> class_tokens_mask;
|
||||
|
||||
|
|
@ -2300,7 +2312,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||
sd_image_f32_t id_image = sd_image_t_to_sd_image_f32_t(pm_params.id_images[i]);
|
||||
sd_image_f32_t processed_id_image = clip_preprocess(id_image, clip_image_size, clip_image_size);
|
||||
free(id_image.data);
|
||||
id_image.data = NULL;
|
||||
id_image.data = nullptr;
|
||||
processed_id_images.push_back(processed_id_image);
|
||||
}
|
||||
|
||||
|
|
@ -2311,7 +2323,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||
|
||||
for (auto& image : processed_id_images) {
|
||||
free(image.data);
|
||||
image.data = NULL;
|
||||
image.data = nullptr;
|
||||
}
|
||||
processed_id_images.clear();
|
||||
|
||||
|
|
@ -2323,7 +2335,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||
condition_params);
|
||||
id_cond = std::get<0>(cond_tup);
|
||||
class_tokens_mask = std::get<1>(cond_tup); //
|
||||
struct ggml_tensor* id_embeds = NULL;
|
||||
struct ggml_tensor* id_embeds = nullptr;
|
||||
if (pmv2 && pm_params.id_embed_path != nullptr) {
|
||||
id_embeds = load_tensor_from_file(work_ctx, pm_params.id_embed_path);
|
||||
// print_ggml_tensor(id_embeds, true, "id_embeds:");
|
||||
|
|
@ -2349,7 +2361,6 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||
}
|
||||
|
||||
// Get learned condition
|
||||
t0 = ggml_time_ms();
|
||||
condition_params.text = prompt;
|
||||
condition_params.zero_out_masked = false;
|
||||
SDCondition cond = sd_ctx->sd->cond_stage_model->get_learned_condition(work_ctx,
|
||||
|
|
@ -2377,8 +2388,8 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||
}
|
||||
|
||||
// Control net hint
|
||||
struct ggml_tensor* image_hint = NULL;
|
||||
if (control_image.data != NULL) {
|
||||
struct ggml_tensor* image_hint = nullptr;
|
||||
if (control_image.data != nullptr) {
|
||||
image_hint = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
|
||||
sd_image_to_tensor(control_image, image_hint);
|
||||
}
|
||||
|
|
@ -2397,8 +2408,8 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||
int H = height / 8;
|
||||
LOG_INFO("sampling using %s method", sampling_methods_str[sample_method]);
|
||||
|
||||
struct ggml_tensor* control_latent = NULL;
|
||||
if (sd_version_is_control(sd_ctx->sd->version) && image_hint != NULL) {
|
||||
struct ggml_tensor* control_latent = nullptr;
|
||||
if (sd_version_is_control(sd_ctx->sd->version) && image_hint != nullptr) {
|
||||
control_latent = sd_ctx->sd->encode_first_stage(work_ctx, image_hint);
|
||||
ggml_tensor_scale(control_latent, control_strength);
|
||||
}
|
||||
|
|
@ -2436,8 +2447,8 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||
}
|
||||
}
|
||||
|
||||
if (sd_ctx->sd->version == VERSION_FLEX_2 && control_latent != NULL && sd_ctx->sd->control_net == NULL) {
|
||||
bool no_inpaint = concat_latent == NULL;
|
||||
if (sd_ctx->sd->version == VERSION_FLEX_2 && control_latent != nullptr && sd_ctx->sd->control_net == nullptr) {
|
||||
bool no_inpaint = concat_latent == nullptr;
|
||||
if (no_inpaint) {
|
||||
concat_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, init_latent->ne[0], init_latent->ne[1], mask_channels + init_latent->ne[2], 1);
|
||||
}
|
||||
|
|
@ -2456,33 +2467,33 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||
}
|
||||
}
|
||||
}
|
||||
} else if (concat_latent == NULL) {
|
||||
} else if (concat_latent == nullptr) {
|
||||
concat_latent = empty_latent;
|
||||
}
|
||||
cond.c_concat = concat_latent;
|
||||
uncond.c_concat = empty_latent;
|
||||
denoise_mask = NULL;
|
||||
denoise_mask = nullptr;
|
||||
} else if (sd_version_is_unet_edit(sd_ctx->sd->version)) {
|
||||
auto empty_latent = ggml_dup_tensor(work_ctx, init_latent);
|
||||
ggml_set_f32(empty_latent, 0);
|
||||
uncond.c_concat = empty_latent;
|
||||
cond.c_concat = ref_latents[0];
|
||||
if (cond.c_concat == NULL) {
|
||||
if (cond.c_concat == nullptr) {
|
||||
cond.c_concat = empty_latent;
|
||||
}
|
||||
} else if (sd_version_is_control(sd_ctx->sd->version)) {
|
||||
auto empty_latent = ggml_dup_tensor(work_ctx, init_latent);
|
||||
ggml_set_f32(empty_latent, 0);
|
||||
uncond.c_concat = empty_latent;
|
||||
if (sd_ctx->sd->control_net == NULL) {
|
||||
if (sd_ctx->sd->control_net == nullptr) {
|
||||
cond.c_concat = control_latent;
|
||||
}
|
||||
if (cond.c_concat == NULL) {
|
||||
if (cond.c_concat == nullptr) {
|
||||
cond.c_concat = empty_latent;
|
||||
}
|
||||
}
|
||||
SDCondition img_cond;
|
||||
if (uncond.c_crossattn != NULL &&
|
||||
if (uncond.c_crossattn != nullptr &&
|
||||
(sd_version_is_inpaint_or_unet_edit(sd_ctx->sd->version) && guidance.txt_cfg != guidance.img_cfg)) {
|
||||
img_cond = SDCondition(uncond.c_crossattn, uncond.c_vector, cond.c_concat);
|
||||
}
|
||||
|
|
@ -2543,7 +2554,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||
t1 = ggml_time_ms();
|
||||
struct ggml_tensor* img = sd_ctx->sd->decode_first_stage(work_ctx, final_latents[i] /* x_0 */);
|
||||
// print_ggml_tensor(img);
|
||||
if (img != NULL) {
|
||||
if (img != nullptr) {
|
||||
decoded_images.push_back(img);
|
||||
}
|
||||
int64_t t2 = ggml_time_ms();
|
||||
|
|
@ -2556,9 +2567,9 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||
sd_ctx->sd->first_stage_model->free_params_buffer();
|
||||
}
|
||||
sd_image_t* result_images = (sd_image_t*)calloc(batch_count, sizeof(sd_image_t));
|
||||
if (result_images == NULL) {
|
||||
if (result_images == nullptr) {
|
||||
ggml_free(work_ctx);
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < decoded_images.size(); i++) {
|
||||
|
|
@ -2623,35 +2634,35 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
|||
model_version_to_str[sd_ctx->sd->version],
|
||||
width,
|
||||
height);
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
} else if (width % 64 || height % 64) {
|
||||
LOG_ERROR("Image dimensions must be must be a multiple of 64 on each axis for %s models. (Got %dx%d)",
|
||||
model_version_to_str[sd_ctx->sd->version],
|
||||
width,
|
||||
height);
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
LOG_DEBUG("generate_image %dx%d", width, height);
|
||||
if (sd_ctx == NULL || sd_img_gen_params == NULL) {
|
||||
return NULL;
|
||||
if (sd_ctx == nullptr || sd_img_gen_params == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(1024 * 1024) * 1024; // 1G
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
// LOG_DEBUG("mem_size %u ", params.mem_size);
|
||||
|
||||
struct ggml_context* work_ctx = ggml_init(params);
|
||||
if (!work_ctx) {
|
||||
LOG_ERROR("ggml_init() failed");
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int64_t seed = sd_img_gen_params->seed;
|
||||
if (seed < 0) {
|
||||
srand((int)time(NULL));
|
||||
srand((int)time(nullptr));
|
||||
seed = rand();
|
||||
}
|
||||
sd_ctx->sd->rng->manual_seed(seed);
|
||||
|
|
@ -2663,9 +2674,9 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
|||
sd_ctx->sd->init_scheduler(sd_img_gen_params->sample_params.scheduler);
|
||||
std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
|
||||
|
||||
ggml_tensor* init_latent = NULL;
|
||||
ggml_tensor* concat_latent = NULL;
|
||||
ggml_tensor* denoise_mask = NULL;
|
||||
ggml_tensor* init_latent = nullptr;
|
||||
ggml_tensor* concat_latent = nullptr;
|
||||
ggml_tensor* denoise_mask = nullptr;
|
||||
if (sd_img_gen_params->init_image.data) {
|
||||
LOG_INFO("IMG2IMG");
|
||||
|
||||
|
|
@ -2692,7 +2703,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
|||
} else if (sd_ctx->sd->version == VERSION_FLEX_2) {
|
||||
mask_channels = 1 + init_latent->ne[2];
|
||||
}
|
||||
ggml_tensor* masked_latent = NULL;
|
||||
ggml_tensor* masked_latent = nullptr;
|
||||
|
||||
if (sd_ctx->sd->version != VERSION_FLEX_2) {
|
||||
// most inpaint models mask before vae
|
||||
|
|
@ -2835,7 +2846,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
|||
ref_latents.push_back(latent);
|
||||
}
|
||||
|
||||
if (sd_img_gen_params->init_image.data != NULL || sd_img_gen_params->ref_images_count > 0) {
|
||||
if (sd_img_gen_params->init_image.data != nullptr || sd_img_gen_params->ref_images_count > 0) {
|
||||
size_t t1 = ggml_time_ms();
|
||||
LOG_INFO("encode_first_stage completed, taking %.2fs", (t1 - t0) * 1.0f / 1000);
|
||||
}
|
||||
|
|
@ -2877,8 +2888,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
|||
}
|
||||
|
||||
SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* sd_vid_gen_params, int* num_frames_out) {
|
||||
if (sd_ctx == NULL || sd_vid_gen_params == NULL) {
|
||||
return NULL;
|
||||
if (sd_ctx == nullptr || sd_vid_gen_params == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::string prompt = SAFE_STR(sd_vid_gen_params->prompt);
|
||||
|
|
@ -2915,24 +2926,23 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||
}
|
||||
}
|
||||
LOG_DEBUG("switching from high noise model at step %d", high_noise_sample_steps);
|
||||
sample_steps = total_steps - high_noise_sample_steps;
|
||||
}
|
||||
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(1024 * 1024) * 1024; // 1G
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
// LOG_DEBUG("mem_size %u ", params.mem_size);
|
||||
|
||||
struct ggml_context* work_ctx = ggml_init(params);
|
||||
if (!work_ctx) {
|
||||
LOG_ERROR("ggml_init() failed");
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int64_t seed = sd_vid_gen_params->seed;
|
||||
if (seed < 0) {
|
||||
seed = (int)time(NULL);
|
||||
seed = (int)time(nullptr);
|
||||
}
|
||||
|
||||
sd_ctx->sd->rng->manual_seed(seed);
|
||||
|
|
@ -2942,11 +2952,11 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||
// Apply lora
|
||||
prompt = sd_ctx->sd->apply_loras_from_prompt(prompt);
|
||||
|
||||
ggml_tensor* init_latent = NULL;
|
||||
ggml_tensor* clip_vision_output = NULL;
|
||||
ggml_tensor* concat_latent = NULL;
|
||||
ggml_tensor* denoise_mask = NULL;
|
||||
ggml_tensor* vace_context = NULL;
|
||||
ggml_tensor* init_latent = nullptr;
|
||||
ggml_tensor* clip_vision_output = nullptr;
|
||||
ggml_tensor* concat_latent = nullptr;
|
||||
ggml_tensor* denoise_mask = nullptr;
|
||||
ggml_tensor* vace_context = nullptr;
|
||||
int64_t ref_image_num = 0; // for vace
|
||||
if (sd_ctx->sd->diffusion_model->get_desc() == "Wan2.1-I2V-14B" ||
|
||||
sd_ctx->sd->diffusion_model->get_desc() == "Wan2.2-I2V-14B" ||
|
||||
|
|
@ -2962,7 +2972,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||
}
|
||||
|
||||
if (sd_ctx->sd->diffusion_model->get_desc() == "Wan2.1-FLF2V-14B") {
|
||||
ggml_tensor* end_image_clip_vision_output = NULL;
|
||||
ggml_tensor* end_image_clip_vision_output = nullptr;
|
||||
if (sd_vid_gen_params->end_image.data) {
|
||||
end_image_clip_vision_output = sd_ctx->sd->get_clip_vision_output(work_ctx, sd_vid_gen_params->end_image, false, -2);
|
||||
} else {
|
||||
|
|
@ -3043,7 +3053,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||
sd_ctx->sd->diffusion_model->get_desc() == "Wan2.x-VACE-14B") {
|
||||
LOG_INFO("VACE");
|
||||
int64_t t1 = ggml_time_ms();
|
||||
ggml_tensor* ref_image_latent = NULL;
|
||||
ggml_tensor* ref_image_latent = nullptr;
|
||||
if (sd_vid_gen_params->init_image.data) {
|
||||
ggml_tensor* ref_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
|
||||
sd_image_to_tensor(sd_vid_gen_params->init_image, ref_img);
|
||||
|
|
@ -3116,7 +3126,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||
LOG_INFO("encode_first_stage completed, taking %" PRId64 " ms", t2 - t1);
|
||||
}
|
||||
|
||||
if (init_latent == NULL) {
|
||||
if (init_latent == nullptr) {
|
||||
init_latent = generate_init_latent(sd_ctx, work_ctx, width, height, frames, true);
|
||||
}
|
||||
|
||||
|
|
@ -3179,7 +3189,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||
cond,
|
||||
uncond,
|
||||
{},
|
||||
NULL,
|
||||
nullptr,
|
||||
0,
|
||||
sd_vid_gen_params->high_noise_sample_params.guidance,
|
||||
sd_vid_gen_params->high_noise_sample_params.eta,
|
||||
|
|
@ -3199,7 +3209,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||
if (sd_ctx->sd->free_params_immediately) {
|
||||
sd_ctx->sd->high_noise_diffusion_model->free_params_buffer();
|
||||
}
|
||||
noise = NULL;
|
||||
noise = nullptr;
|
||||
}
|
||||
|
||||
// Sample
|
||||
|
|
@ -3215,7 +3225,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||
cond,
|
||||
uncond,
|
||||
{},
|
||||
NULL,
|
||||
nullptr,
|
||||
0,
|
||||
sd_vid_gen_params->sample_params.guidance,
|
||||
sd_vid_gen_params->sample_params.eta,
|
||||
|
|
@ -3261,9 +3271,9 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||
}
|
||||
|
||||
sd_image_t* result_images = (sd_image_t*)calloc(vid->ne[2], sizeof(sd_image_t));
|
||||
if (result_images == NULL) {
|
||||
if (result_images == nullptr) {
|
||||
ggml_free(work_ctx);
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
*num_frames_out = vid->ne[2];
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef __T5_HPP__
|
||||
#define __T5_HPP__
|
||||
|
||||
#include <float.h>
|
||||
#include <cfloat>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
|
@ -461,7 +461,7 @@ protected:
|
|||
int64_t hidden_size;
|
||||
float eps;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
|
||||
enum ggml_type wtype = GGML_TYPE_F32;
|
||||
params["weight"] = ggml_new_tensor_1d(ctx, wtype, hidden_size);
|
||||
}
|
||||
|
|
@ -472,7 +472,7 @@ public:
|
|||
: hidden_size(hidden_size),
|
||||
eps(eps) {}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
struct ggml_tensor* w = params["weight"];
|
||||
x = ggml_rms_norm(ctx, x, eps);
|
||||
x = ggml_mul(ctx, x, w);
|
||||
|
|
@ -487,7 +487,7 @@ public:
|
|||
blocks["wo"] = std::shared_ptr<GGMLBlock>(new Linear(ff_dim, model_dim, false));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
// x: [N, n_token, model_dim]
|
||||
auto wi = std::dynamic_pointer_cast<Linear>(blocks["wi"]);
|
||||
auto wo = std::dynamic_pointer_cast<Linear>(blocks["wo"]);
|
||||
|
|
@ -509,7 +509,7 @@ public:
|
|||
blocks["wo"] = std::shared_ptr<GGMLBlock>(new Linear(ff_dim, model_dim, false, false, false, scale));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
// x: [N, n_token, model_dim]
|
||||
auto wi_0 = std::dynamic_pointer_cast<Linear>(blocks["wi_0"]);
|
||||
auto wi_1 = std::dynamic_pointer_cast<Linear>(blocks["wi_1"]);
|
||||
|
|
@ -530,7 +530,7 @@ public:
|
|||
blocks["layer_norm"] = std::shared_ptr<GGMLBlock>(new T5LayerNorm(model_dim));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
// x: [N, n_token, model_dim]
|
||||
auto DenseReluDense = std::dynamic_pointer_cast<T5DenseGatedActDense>(blocks["DenseReluDense"]);
|
||||
auto layer_norm = std::dynamic_pointer_cast<T5LayerNorm>(blocks["layer_norm"]);
|
||||
|
|
@ -582,9 +582,9 @@ public:
|
|||
std::pair<struct ggml_tensor*, struct ggml_tensor*> forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* past_bias = NULL,
|
||||
struct ggml_tensor* mask = NULL,
|
||||
struct ggml_tensor* relative_position_bucket = NULL) {
|
||||
struct ggml_tensor* past_bias = nullptr,
|
||||
struct ggml_tensor* mask = nullptr,
|
||||
struct ggml_tensor* relative_position_bucket = nullptr) {
|
||||
auto q_proj = std::dynamic_pointer_cast<Linear>(blocks["q"]);
|
||||
auto k_proj = std::dynamic_pointer_cast<Linear>(blocks["k"]);
|
||||
auto v_proj = std::dynamic_pointer_cast<Linear>(blocks["v"]);
|
||||
|
|
@ -597,11 +597,11 @@ public:
|
|||
auto k = k_proj->forward(ctx, x);
|
||||
auto v = v_proj->forward(ctx, x);
|
||||
|
||||
if (using_relative_attention_bias && relative_position_bucket != NULL) {
|
||||
if (using_relative_attention_bias && relative_position_bucket != nullptr) {
|
||||
past_bias = compute_bias(ctx, relative_position_bucket);
|
||||
}
|
||||
if (past_bias != NULL) {
|
||||
if (mask != NULL) {
|
||||
if (past_bias != nullptr) {
|
||||
if (mask != nullptr) {
|
||||
mask = ggml_repeat(ctx, mask, past_bias);
|
||||
mask = ggml_add(ctx, mask, past_bias);
|
||||
} else {
|
||||
|
|
@ -632,9 +632,9 @@ public:
|
|||
std::pair<struct ggml_tensor*, struct ggml_tensor*> forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* past_bias = NULL,
|
||||
struct ggml_tensor* mask = NULL,
|
||||
struct ggml_tensor* relative_position_bucket = NULL) {
|
||||
struct ggml_tensor* past_bias = nullptr,
|
||||
struct ggml_tensor* mask = nullptr,
|
||||
struct ggml_tensor* relative_position_bucket = nullptr) {
|
||||
// x: [N, n_token, model_dim]
|
||||
auto SelfAttention = std::dynamic_pointer_cast<T5Attention>(blocks["SelfAttention"]);
|
||||
auto layer_norm = std::dynamic_pointer_cast<T5LayerNorm>(blocks["layer_norm"]);
|
||||
|
|
@ -659,9 +659,9 @@ public:
|
|||
std::pair<struct ggml_tensor*, struct ggml_tensor*> forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* past_bias = NULL,
|
||||
struct ggml_tensor* mask = NULL,
|
||||
struct ggml_tensor* relative_position_bucket = NULL) {
|
||||
struct ggml_tensor* past_bias = nullptr,
|
||||
struct ggml_tensor* mask = nullptr,
|
||||
struct ggml_tensor* relative_position_bucket = nullptr) {
|
||||
// x: [N, n_token, model_dim]
|
||||
auto layer_0 = std::dynamic_pointer_cast<T5LayerSelfAttention>(blocks["layer.0"]);
|
||||
auto layer_1 = std::dynamic_pointer_cast<T5LayerFF>(blocks["layer.1"]);
|
||||
|
|
@ -695,9 +695,9 @@ public:
|
|||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* past_bias = NULL,
|
||||
struct ggml_tensor* attention_mask = NULL,
|
||||
struct ggml_tensor* relative_position_bucket = NULL) {
|
||||
struct ggml_tensor* past_bias = nullptr,
|
||||
struct ggml_tensor* attention_mask = nullptr,
|
||||
struct ggml_tensor* relative_position_bucket = nullptr) {
|
||||
// x: [N, n_token, model_dim]
|
||||
for (int i = 0; i < num_layers; i++) {
|
||||
auto block = std::dynamic_pointer_cast<T5Block>(blocks["block." + std::to_string(i)]);
|
||||
|
|
@ -743,9 +743,9 @@ public:
|
|||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* input_ids,
|
||||
struct ggml_tensor* past_bias = NULL,
|
||||
struct ggml_tensor* attention_mask = NULL,
|
||||
struct ggml_tensor* relative_position_bucket = NULL) {
|
||||
struct ggml_tensor* past_bias = nullptr,
|
||||
struct ggml_tensor* attention_mask = nullptr,
|
||||
struct ggml_tensor* relative_position_bucket = nullptr) {
|
||||
// input_ids: [N, n_token]
|
||||
|
||||
auto shared = std::dynamic_pointer_cast<Embedding>(blocks["shared"]);
|
||||
|
|
@ -776,7 +776,7 @@ struct T5Runner : public GGMLRunner {
|
|||
model.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return "t5";
|
||||
}
|
||||
|
||||
|
|
@ -788,16 +788,16 @@ struct T5Runner : public GGMLRunner {
|
|||
ggml_backend_t backend,
|
||||
struct ggml_tensor* input_ids,
|
||||
struct ggml_tensor* relative_position_bucket,
|
||||
struct ggml_tensor* attention_mask = NULL) {
|
||||
struct ggml_tensor* attention_mask = nullptr) {
|
||||
size_t N = input_ids->ne[1];
|
||||
size_t n_token = input_ids->ne[0];
|
||||
|
||||
auto hidden_states = model.forward(ctx, backend, input_ids, NULL, attention_mask, relative_position_bucket); // [N, n_token, model_dim]
|
||||
auto hidden_states = model.forward(ctx, backend, input_ids, nullptr, attention_mask, relative_position_bucket); // [N, n_token, model_dim]
|
||||
return hidden_states;
|
||||
}
|
||||
|
||||
struct ggml_cgraph* build_graph(struct ggml_tensor* input_ids,
|
||||
struct ggml_tensor* attention_mask = NULL) {
|
||||
struct ggml_tensor* attention_mask = nullptr) {
|
||||
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);
|
||||
|
||||
input_ids = to_backend(input_ids);
|
||||
|
|
@ -829,7 +829,7 @@ struct T5Runner : public GGMLRunner {
|
|||
struct ggml_tensor* input_ids,
|
||||
struct ggml_tensor* attention_mask,
|
||||
ggml_tensor** output,
|
||||
ggml_context* output_ctx = NULL) {
|
||||
ggml_context* output_ctx = nullptr) {
|
||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_graph(input_ids, attention_mask);
|
||||
};
|
||||
|
|
@ -968,11 +968,11 @@ struct T5Embedder {
|
|||
void test() {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 MB
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
|
||||
struct ggml_context* work_ctx = ggml_init(params);
|
||||
GGML_ASSERT(work_ctx != NULL);
|
||||
GGML_ASSERT(work_ctx != nullptr);
|
||||
|
||||
{
|
||||
std::string text("a lovely cat");
|
||||
|
|
@ -987,7 +987,7 @@ struct T5Embedder {
|
|||
printf("\n");
|
||||
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, tokens);
|
||||
auto attention_mask = vector_to_ggml_tensor(work_ctx, masks);
|
||||
struct ggml_tensor* out = NULL;
|
||||
struct ggml_tensor* out = nullptr;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
model.compute(8, input_ids, attention_mask, &out, work_ctx);
|
||||
|
|
@ -1022,7 +1022,7 @@ struct T5Embedder {
|
|||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<T5Embedder> t5 = std::shared_ptr<T5Embedder>(new T5Embedder(backend, false, tensor_types, "", true));
|
||||
std::shared_ptr<T5Embedder> t5 = std::make_shared<T5Embedder>(backend, false, tensor_types, "", true);
|
||||
|
||||
t5->alloc_params_buffer();
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
// x: [n, n_in, h, w]
|
||||
// return: [n, n_out, h, w]
|
||||
|
||||
|
|
@ -86,7 +86,7 @@ public:
|
|||
blocks[std::to_string(index++)] = std::shared_ptr<GGMLBlock>(new Conv2d(channels, z_channels, {3, 3}, {1, 1}, {1, 1}));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
// x: [n, in_channels, h, w]
|
||||
// return: [n, z_channels, h/8, w/8]
|
||||
|
||||
|
|
@ -136,7 +136,7 @@ public:
|
|||
blocks[std::to_string(index++)] = std::shared_ptr<GGMLBlock>(new Conv2d(channels, out_channels, {3, 3}, {1, 1}, {1, 1}));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* z) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* z) override {
|
||||
// z: [n, z_channels, h, w]
|
||||
// return: [n, out_channels, h*8, w*8]
|
||||
|
||||
|
|
@ -218,7 +218,7 @@ struct TinyAutoEncoder : public GGMLRunner {
|
|||
}
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return "taesd";
|
||||
}
|
||||
|
||||
|
|
@ -261,7 +261,7 @@ struct TinyAutoEncoder : public GGMLRunner {
|
|||
struct ggml_tensor* z,
|
||||
bool decode_graph,
|
||||
struct ggml_tensor** output,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
struct ggml_context* output_ctx = nullptr) {
|
||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_graph(z, decode_graph);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -384,8 +384,8 @@ public:
|
|||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* timesteps,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* c_concat = NULL,
|
||||
struct ggml_tensor* y = NULL,
|
||||
struct ggml_tensor* c_concat = nullptr,
|
||||
struct ggml_tensor* y = nullptr,
|
||||
int num_video_frames = -1,
|
||||
std::vector<struct ggml_tensor*> controls = {},
|
||||
float control_strength = 0.f) {
|
||||
|
|
@ -395,20 +395,20 @@ public:
|
|||
// c_concat: [N, in_channels, h, w] or [1, in_channels, h, w]
|
||||
// y: [N, adm_in_channels] or [1, adm_in_channels]
|
||||
// return: [N, out_channels, h, w]
|
||||
if (context != NULL) {
|
||||
if (context != nullptr) {
|
||||
if (context->ne[2] != x->ne[3]) {
|
||||
context = ggml_repeat(ctx, context, ggml_new_tensor_3d(ctx, GGML_TYPE_F32, context->ne[0], context->ne[1], x->ne[3]));
|
||||
}
|
||||
}
|
||||
|
||||
if (c_concat != NULL) {
|
||||
if (c_concat != nullptr) {
|
||||
if (c_concat->ne[3] != x->ne[3]) {
|
||||
c_concat = ggml_repeat(ctx, c_concat, x);
|
||||
}
|
||||
x = ggml_concat(ctx, x, c_concat, 2);
|
||||
}
|
||||
|
||||
if (y != NULL) {
|
||||
if (y != nullptr) {
|
||||
if (y->ne[1] != x->ne[3]) {
|
||||
y = ggml_repeat(ctx, y, ggml_new_tensor_2d(ctx, GGML_TYPE_F32, y->ne[0], x->ne[3]));
|
||||
}
|
||||
|
|
@ -428,7 +428,7 @@ public:
|
|||
emb = time_embed_2->forward(ctx, emb); // [N, time_embed_dim]
|
||||
|
||||
// SDXL/SVD
|
||||
if (y != NULL) {
|
||||
if (y != nullptr) {
|
||||
auto label_embed_0 = std::dynamic_pointer_cast<Linear>(blocks["label_emb.0.0"]);
|
||||
auto label_embed_2 = std::dynamic_pointer_cast<Linear>(blocks["label_emb.0.2"]);
|
||||
|
||||
|
|
@ -562,7 +562,7 @@ struct UNetModelRunner : public GGMLRunner {
|
|||
}
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return "unet";
|
||||
}
|
||||
|
||||
|
|
@ -573,8 +573,8 @@ struct UNetModelRunner : public GGMLRunner {
|
|||
struct ggml_cgraph* build_graph(struct ggml_tensor* x,
|
||||
struct ggml_tensor* timesteps,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* c_concat = NULL,
|
||||
struct ggml_tensor* y = NULL,
|
||||
struct ggml_tensor* c_concat = nullptr,
|
||||
struct ggml_tensor* y = nullptr,
|
||||
int num_video_frames = -1,
|
||||
std::vector<struct ggml_tensor*> controls = {},
|
||||
float control_strength = 0.f) {
|
||||
|
|
@ -619,8 +619,8 @@ struct UNetModelRunner : public GGMLRunner {
|
|||
int num_video_frames = -1,
|
||||
std::vector<struct ggml_tensor*> controls = {},
|
||||
float control_strength = 0.f,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
struct ggml_tensor** output = nullptr,
|
||||
struct ggml_context* output_ctx = nullptr) {
|
||||
// x: [N, in_channels, h, w]
|
||||
// timesteps: [N, ]
|
||||
// context: [N, max_position, hidden_size]([N, 77, 768]) or [1, max_position, hidden_size]
|
||||
|
|
@ -636,11 +636,11 @@ struct UNetModelRunner : public GGMLRunner {
|
|||
void test() {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 MB
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
|
||||
struct ggml_context* work_ctx = ggml_init(params);
|
||||
GGML_ASSERT(work_ctx != NULL);
|
||||
GGML_ASSERT(work_ctx != nullptr);
|
||||
|
||||
{
|
||||
// CPU, num_video_frames = 1, x{num_video_frames, 8, 8, 8}: Pass
|
||||
|
|
@ -663,10 +663,10 @@ struct UNetModelRunner : public GGMLRunner {
|
|||
ggml_set_f32(y, 0.5f);
|
||||
// print_ggml_tensor(y);
|
||||
|
||||
struct ggml_tensor* out = NULL;
|
||||
struct ggml_tensor* out = nullptr;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
compute(8, x, timesteps, context, NULL, y, num_video_frames, {}, 0.f, &out, work_ctx);
|
||||
compute(8, x, timesteps, context, nullptr, y, num_video_frames, {}, 0.f, &out, work_ctx);
|
||||
int t1 = ggml_time_ms();
|
||||
|
||||
print_ggml_tensor(out);
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include "stable-diffusion.h"
|
||||
|
||||
struct UpscalerGGML {
|
||||
ggml_backend_t backend = NULL; // general backend
|
||||
ggml_backend_t backend = nullptr; // general backend
|
||||
ggml_type model_data_type = GGML_TYPE_F16;
|
||||
std::shared_ptr<ESRGAN> esrgan_upscaler;
|
||||
std::string esrgan_path;
|
||||
|
|
@ -63,7 +63,7 @@ struct UpscalerGGML {
|
|||
|
||||
sd_image_t upscale(sd_image_t input_image, uint32_t upscale_factor) {
|
||||
// upscale_factor, unused for RealESRGAN_x4plus_anime_6B.pth
|
||||
sd_image_t upscaled_image = {0, 0, 0, NULL};
|
||||
sd_image_t upscaled_image = {0, 0, 0, nullptr};
|
||||
int output_width = (int)input_image.width * esrgan_upscaler->scale;
|
||||
int output_height = (int)input_image.height * esrgan_upscaler->scale;
|
||||
LOG_INFO("upscaling from (%i x %i) to (%i x %i)",
|
||||
|
|
@ -71,7 +71,7 @@ struct UpscalerGGML {
|
|||
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(1024 * 1024) * 1024; // 1G
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
|
||||
// draft context
|
||||
|
|
@ -107,7 +107,7 @@ struct UpscalerGGML {
|
|||
};
|
||||
|
||||
struct upscaler_ctx_t {
|
||||
UpscalerGGML* upscaler = NULL;
|
||||
UpscalerGGML* upscaler = nullptr;
|
||||
};
|
||||
|
||||
upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
|
||||
|
|
@ -115,21 +115,21 @@ upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
|
|||
bool direct,
|
||||
int n_threads) {
|
||||
upscaler_ctx_t* upscaler_ctx = (upscaler_ctx_t*)malloc(sizeof(upscaler_ctx_t));
|
||||
if (upscaler_ctx == NULL) {
|
||||
return NULL;
|
||||
if (upscaler_ctx == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
std::string esrgan_path(esrgan_path_c_str);
|
||||
|
||||
upscaler_ctx->upscaler = new UpscalerGGML(n_threads, direct);
|
||||
if (upscaler_ctx->upscaler == NULL) {
|
||||
return NULL;
|
||||
if (upscaler_ctx->upscaler == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!upscaler_ctx->upscaler->load_from_file(esrgan_path, offload_params_to_cpu, n_threads)) {
|
||||
delete upscaler_ctx->upscaler;
|
||||
upscaler_ctx->upscaler = NULL;
|
||||
upscaler_ctx->upscaler = nullptr;
|
||||
free(upscaler_ctx);
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
return upscaler_ctx;
|
||||
}
|
||||
|
|
@ -139,16 +139,16 @@ sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_
|
|||
}
|
||||
|
||||
int get_upscale_factor(upscaler_ctx_t* upscaler_ctx) {
|
||||
if (upscaler_ctx == NULL || upscaler_ctx->upscaler == NULL || upscaler_ctx->upscaler->esrgan_upscaler == NULL) {
|
||||
if (upscaler_ctx == nullptr || upscaler_ctx->upscaler == nullptr || upscaler_ctx->upscaler->esrgan_upscaler == nullptr) {
|
||||
return 1;
|
||||
}
|
||||
return upscaler_ctx->upscaler->esrgan_upscaler->scale;
|
||||
}
|
||||
|
||||
void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx) {
|
||||
if (upscaler_ctx->upscaler != NULL) {
|
||||
if (upscaler_ctx->upscaler != nullptr) {
|
||||
delete upscaler_ctx->upscaler;
|
||||
upscaler_ctx->upscaler = NULL;
|
||||
upscaler_ctx->upscaler = nullptr;
|
||||
}
|
||||
free(upscaler_ctx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
#include "util.h"
|
||||
#include <stdarg.h>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <codecvt>
|
||||
#include <cstdarg>
|
||||
#include <fstream>
|
||||
#include <locale>
|
||||
#include <sstream>
|
||||
|
|
@ -160,11 +160,11 @@ int32_t sd_get_num_physical_cores() {
|
|||
#elif defined(__APPLE__) && defined(__MACH__)
|
||||
int32_t num_physical_cores;
|
||||
size_t len = sizeof(num_physical_cores);
|
||||
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
|
||||
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, nullptr, 0);
|
||||
if (result == 0) {
|
||||
return num_physical_cores;
|
||||
}
|
||||
result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
|
||||
result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, nullptr, 0);
|
||||
if (result == 0) {
|
||||
return num_physical_cores;
|
||||
}
|
||||
|
|
@ -175,8 +175,8 @@ int32_t sd_get_num_physical_cores() {
|
|||
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
|
||||
}
|
||||
|
||||
static sd_progress_cb_t sd_progress_cb = NULL;
|
||||
void* sd_progress_cb_data = NULL;
|
||||
static sd_progress_cb_t sd_progress_cb = nullptr;
|
||||
void* sd_progress_cb_data = nullptr;
|
||||
|
||||
std::u32string utf8_to_utf32(const std::string& utf8_str) {
|
||||
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
|
||||
|
|
@ -293,8 +293,8 @@ std::string trim(const std::string& s) {
|
|||
return rtrim(ltrim(s));
|
||||
}
|
||||
|
||||
static sd_log_cb_t sd_log_cb = NULL;
|
||||
void* sd_log_cb_data = NULL;
|
||||
static sd_log_cb_t sd_log_cb = nullptr;
|
||||
void* sd_log_cb_data = nullptr;
|
||||
|
||||
#define LOG_BUFFER_SIZE 4096
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
// x: [N, in_channels, h, w]
|
||||
// t_emb is always None
|
||||
auto norm1 = std::dynamic_pointer_cast<GroupNorm32>(blocks["norm1"]);
|
||||
|
|
@ -76,7 +76,7 @@ public:
|
|||
blocks["proj_out"] = std::shared_ptr<GGMLBlock>(new Conv2d(in_channels, in_channels, {1, 1}));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
// x: [N, in_channels, h, w]
|
||||
auto norm = std::dynamic_pointer_cast<GroupNorm32>(blocks["norm"]);
|
||||
auto q_proj = std::dynamic_pointer_cast<Conv2d>(blocks["q"]);
|
||||
|
|
@ -134,7 +134,7 @@ public:
|
|||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
struct ggml_tensor* x) {
|
||||
struct ggml_tensor* x) override {
|
||||
// timesteps always None
|
||||
// skip_video always False
|
||||
// x: [N, IC, IH, IW]
|
||||
|
|
@ -163,7 +163,7 @@ public:
|
|||
|
||||
class VideoResnetBlock : public ResnetBlock {
|
||||
protected:
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
|
||||
enum ggml_type wtype = get_type(prefix + "mix_factor", tensor_types, GGML_TYPE_F32);
|
||||
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
||||
}
|
||||
|
|
@ -182,7 +182,7 @@ public:
|
|||
blocks["time_stack"] = std::shared_ptr<GGMLBlock>(new ResBlock(out_channels, 0, out_channels, {video_kernel_size, 1}, 3, false, true));
|
||||
}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
// x: [N, in_channels, h, w] aka [b*t, in_channels, h, w]
|
||||
// return: [N, out_channels, h, w] aka [b*t, out_channels, h, w]
|
||||
// t_emb is always None
|
||||
|
|
@ -548,7 +548,7 @@ struct AutoEncoderKL : public VAE {
|
|||
ae.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
void enable_conv2d_direct() {
|
||||
void enable_conv2d_direct() override {
|
||||
std::vector<GGMLBlock*> blocks;
|
||||
ae.get_all_blocks(blocks);
|
||||
for (auto block : blocks) {
|
||||
|
|
@ -559,7 +559,7 @@ struct AutoEncoderKL : public VAE {
|
|||
}
|
||||
}
|
||||
|
||||
void set_conv2d_scale(float scale) {
|
||||
void set_conv2d_scale(float scale) override {
|
||||
std::vector<GGMLBlock*> blocks;
|
||||
ae.get_all_blocks(blocks);
|
||||
for (auto block : blocks) {
|
||||
|
|
@ -570,11 +570,11 @@ struct AutoEncoderKL : public VAE {
|
|||
}
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return "vae";
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) override {
|
||||
ae.get_param_tensors(tensors, prefix);
|
||||
}
|
||||
|
||||
|
|
@ -594,7 +594,7 @@ struct AutoEncoderKL : public VAE {
|
|||
struct ggml_tensor* z,
|
||||
bool decode_graph,
|
||||
struct ggml_tensor** output,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
struct ggml_context* output_ctx = nullptr) override {
|
||||
GGML_ASSERT(!decode_only || decode_graph);
|
||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_graph(z, decode_graph);
|
||||
|
|
@ -607,11 +607,11 @@ struct AutoEncoderKL : public VAE {
|
|||
void test() {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 MB
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
|
||||
struct ggml_context* work_ctx = ggml_init(params);
|
||||
GGML_ASSERT(work_ctx != NULL);
|
||||
GGML_ASSERT(work_ctx != nullptr);
|
||||
|
||||
{
|
||||
// CPU, x{1, 3, 64, 64}: Pass
|
||||
|
|
@ -621,7 +621,7 @@ struct AutoEncoderKL : public VAE {
|
|||
auto x = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 64, 64, 3, 2);
|
||||
ggml_set_f32(x, 0.5f);
|
||||
print_ggml_tensor(x);
|
||||
struct ggml_tensor* out = NULL;
|
||||
struct ggml_tensor* out = nullptr;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
compute(8, x, false, &out, work_ctx);
|
||||
|
|
@ -639,7 +639,7 @@ struct AutoEncoderKL : public VAE {
|
|||
auto z = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 8, 8, 4, 1);
|
||||
ggml_set_f32(z, 0.5f);
|
||||
print_ggml_tensor(z);
|
||||
struct ggml_tensor* out = NULL;
|
||||
struct ggml_tensor* out = nullptr;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
compute(8, z, true, &out, work_ctx);
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
#define __WAN_HPP__
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "common.hpp"
|
||||
#include "flux.hpp"
|
||||
|
|
@ -24,7 +26,7 @@ namespace WAN {
|
|||
std::tuple<int, int, int> dilation;
|
||||
bool bias;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
|
||||
params["weight"] = ggml_new_tensor_4d(ctx,
|
||||
GGML_TYPE_F16,
|
||||
std::get<2>(kernel_size),
|
||||
|
|
@ -46,17 +48,17 @@ namespace WAN {
|
|||
bool bias = true)
|
||||
: in_channels(in_channels),
|
||||
out_channels(out_channels),
|
||||
kernel_size(kernel_size),
|
||||
stride(stride),
|
||||
padding(padding),
|
||||
dilation(dilation),
|
||||
kernel_size(std::move(kernel_size)),
|
||||
stride(std::move(stride)),
|
||||
padding(std::move(padding)),
|
||||
dilation(std::move(dilation)),
|
||||
bias(bias) {}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* cache_x = NULL) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* cache_x = nullptr) {
|
||||
// x: [N*IC, ID, IH, IW]
|
||||
// result: x: [N*OC, ID, IH, IW]
|
||||
struct ggml_tensor* w = params["weight"];
|
||||
struct ggml_tensor* b = NULL;
|
||||
struct ggml_tensor* b = nullptr;
|
||||
if (bias) {
|
||||
b = params["bias"];
|
||||
}
|
||||
|
|
@ -68,7 +70,7 @@ namespace WAN {
|
|||
int lp2 = 2 * std::get<0>(padding);
|
||||
int rp2 = 0;
|
||||
|
||||
if (cache_x != NULL && lp2 > 0) {
|
||||
if (cache_x != nullptr && lp2 > 0) {
|
||||
x = ggml_concat(ctx, cache_x, x, 2);
|
||||
lp2 -= (int)cache_x->ne[2];
|
||||
}
|
||||
|
|
@ -85,7 +87,7 @@ namespace WAN {
|
|||
protected:
|
||||
int64_t dim;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
|
||||
ggml_type wtype = GGML_TYPE_F32;
|
||||
params["gamma"] = ggml_new_tensor_1d(ctx, wtype, dim);
|
||||
}
|
||||
|
|
@ -94,7 +96,7 @@ namespace WAN {
|
|||
RMS_norm(int64_t dim)
|
||||
: dim(dim) {}
|
||||
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
|
||||
// x: [N*IC, ID, IH, IW], IC == dim
|
||||
// assert N == 1
|
||||
|
||||
|
|
@ -159,12 +161,12 @@ namespace WAN {
|
|||
int idx = feat_idx;
|
||||
feat_idx += 1;
|
||||
if (chunk_idx == 0) {
|
||||
// feat_cache[idx] == NULL, pass
|
||||
// feat_cache[idx] == nullptr, pass
|
||||
} else {
|
||||
auto time_conv = std::dynamic_pointer_cast<CausalConv3d>(blocks["time_conv"]);
|
||||
|
||||
auto cache_x = ggml_slice(ctx, x, 2, -CACHE_T, x->ne[2]);
|
||||
if (cache_x->ne[2] < 2 && feat_cache[idx] != NULL) { // chunk_idx >= 2
|
||||
if (cache_x->ne[2] < 2 && feat_cache[idx] != nullptr) { // chunk_idx >= 2
|
||||
// cache last frame of last two chunk
|
||||
cache_x = ggml_concat(ctx,
|
||||
ggml_slice(ctx, feat_cache[idx], 2, -1, feat_cache[idx]->ne[2]),
|
||||
|
|
@ -209,7 +211,7 @@ namespace WAN {
|
|||
if (mode == "downsample3d") {
|
||||
if (feat_cache.size() > 0) {
|
||||
int idx = feat_idx;
|
||||
if (feat_cache[idx] == NULL) {
|
||||
if (feat_cache[idx] == nullptr) {
|
||||
feat_cache[idx] = x;
|
||||
feat_idx += 1;
|
||||
} else {
|
||||
|
|
@ -373,7 +375,7 @@ namespace WAN {
|
|||
if (feat_cache.size() > 0) {
|
||||
int idx = feat_idx;
|
||||
auto cache_x = ggml_slice(ctx, x, 2, -CACHE_T, x->ne[2]);
|
||||
if (cache_x->ne[2] < 2 && feat_cache[idx] != NULL) {
|
||||
if (cache_x->ne[2] < 2 && feat_cache[idx] != nullptr) {
|
||||
// cache last frame of last two chunk
|
||||
cache_x = ggml_concat(ctx,
|
||||
ggml_slice(ctx, feat_cache[idx], 2, -1, feat_cache[idx]->ne[2]),
|
||||
|
|
@ -566,7 +568,7 @@ namespace WAN {
|
|||
|
||||
x = ggml_nn_attention(ctx, q, k, v, false); // [t, h * w, c]
|
||||
// v = ggml_cont(ctx, ggml_torch_permute(ctx, v, 1, 0, 2, 3)); // [t, h * w, c]
|
||||
// x = ggml_nn_attention_ext(ctx, q, k, v, q->ne[2], NULL, false, false, true);
|
||||
// x = ggml_nn_attention_ext(ctx, q, k, v, q->ne[2], nullptr, false, false, true);
|
||||
|
||||
x = ggml_nn_cont(ctx, ggml_permute(ctx, x, 1, 0, 2, 3)); // [t, c, h * w]
|
||||
x = ggml_reshape_4d(ctx, x, w, h, c, n); // [t, c, h, w]
|
||||
|
|
@ -672,7 +674,7 @@ namespace WAN {
|
|||
if (feat_cache.size() > 0) {
|
||||
int idx = feat_idx;
|
||||
auto cache_x = ggml_slice(ctx, x, 2, -CACHE_T, x->ne[2]);
|
||||
if (cache_x->ne[2] < 2 && feat_cache[idx] != NULL) {
|
||||
if (cache_x->ne[2] < 2 && feat_cache[idx] != nullptr) {
|
||||
// cache last frame of last two chunk
|
||||
cache_x = ggml_concat(ctx,
|
||||
ggml_slice(ctx, feat_cache[idx], 2, -1, feat_cache[idx]->ne[2]),
|
||||
|
|
@ -724,7 +726,7 @@ namespace WAN {
|
|||
if (feat_cache.size() > 0) {
|
||||
int idx = feat_idx;
|
||||
auto cache_x = ggml_slice(ctx, x, 2, -CACHE_T, x->ne[2]);
|
||||
if (cache_x->ne[2] < 2 && feat_cache[idx] != NULL) {
|
||||
if (cache_x->ne[2] < 2 && feat_cache[idx] != nullptr) {
|
||||
// cache last frame of last two chunk
|
||||
cache_x = ggml_concat(ctx,
|
||||
ggml_slice(ctx, feat_cache[idx], 2, -1, feat_cache[idx]->ne[2]),
|
||||
|
|
@ -843,7 +845,7 @@ namespace WAN {
|
|||
if (feat_cache.size() > 0) {
|
||||
int idx = feat_idx;
|
||||
auto cache_x = ggml_slice(ctx, x, 2, -CACHE_T, x->ne[2]);
|
||||
if (cache_x->ne[2] < 2 && feat_cache[idx] != NULL) {
|
||||
if (cache_x->ne[2] < 2 && feat_cache[idx] != nullptr) {
|
||||
// cache last frame of last two chunk
|
||||
cache_x = ggml_concat(ctx,
|
||||
ggml_slice(ctx, feat_cache[idx], 2, -1, feat_cache[idx]->ne[2]),
|
||||
|
|
@ -895,7 +897,7 @@ namespace WAN {
|
|||
if (feat_cache.size() > 0) {
|
||||
int idx = feat_idx;
|
||||
auto cache_x = ggml_slice(ctx, x, 2, -CACHE_T, x->ne[2]);
|
||||
if (cache_x->ne[2] < 2 && feat_cache[idx] != NULL) {
|
||||
if (cache_x->ne[2] < 2 && feat_cache[idx] != nullptr) {
|
||||
// cache last frame of last two chunk
|
||||
cache_x = ggml_concat(ctx,
|
||||
ggml_slice(ctx, feat_cache[idx], 2, -1, feat_cache[idx]->ne[2]),
|
||||
|
|
@ -935,9 +937,9 @@ namespace WAN {
|
|||
|
||||
void clear_cache() {
|
||||
_conv_idx = 0;
|
||||
_feat_map = std::vector<struct ggml_tensor*>(_conv_num, NULL);
|
||||
_feat_map = std::vector<struct ggml_tensor*>(_conv_num, nullptr);
|
||||
_enc_conv_idx = 0;
|
||||
_enc_feat_map = std::vector<struct ggml_tensor*>(_enc_conv_num, NULL);
|
||||
_enc_feat_map = std::vector<struct ggml_tensor*>(_enc_conv_num, nullptr);
|
||||
}
|
||||
|
||||
public:
|
||||
|
|
@ -1116,11 +1118,11 @@ namespace WAN {
|
|||
ae.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return "wan_vae";
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) override {
|
||||
ae.get_param_tensors(tensors, prefix);
|
||||
}
|
||||
|
||||
|
|
@ -1152,7 +1154,7 @@ namespace WAN {
|
|||
|
||||
for (int64_t feat_idx = 0; feat_idx < ae._feat_map.size(); feat_idx++) {
|
||||
ggml_tensor* feat_cache = ae._feat_map[feat_idx];
|
||||
if (feat_cache != NULL) {
|
||||
if (feat_cache != nullptr) {
|
||||
cache("feat_idx:" + std::to_string(feat_idx), feat_cache);
|
||||
ggml_build_forward_expand(gf, feat_cache);
|
||||
}
|
||||
|
|
@ -1167,7 +1169,7 @@ namespace WAN {
|
|||
struct ggml_tensor* z,
|
||||
bool decode_graph,
|
||||
struct ggml_tensor** output,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
struct ggml_context* output_ctx = nullptr) override {
|
||||
if (true) {
|
||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_graph(z, decode_graph);
|
||||
|
|
@ -1180,7 +1182,7 @@ namespace WAN {
|
|||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_graph_partial(z, decode_graph, i);
|
||||
};
|
||||
struct ggml_tensor* out = NULL;
|
||||
struct ggml_tensor* out = nullptr;
|
||||
GGMLRunner::compute(get_graph, n_threads, true, &out, output_ctx);
|
||||
ae.clear_cache();
|
||||
if (t == 1) {
|
||||
|
|
@ -1220,11 +1222,11 @@ namespace WAN {
|
|||
void test() {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(1024 * 1024) * 1024; // 1G
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
|
||||
struct ggml_context* work_ctx = ggml_init(params);
|
||||
GGML_ASSERT(work_ctx != NULL);
|
||||
GGML_ASSERT(work_ctx != nullptr);
|
||||
|
||||
if (true) {
|
||||
// cpu f32, pass
|
||||
|
|
@ -1235,7 +1237,7 @@ namespace WAN {
|
|||
ggml_set_f32(z, 0.5f);
|
||||
z = load_tensor_from_file(work_ctx, "wan_vae_z.bin");
|
||||
print_ggml_tensor(z);
|
||||
struct ggml_tensor* out = NULL;
|
||||
struct ggml_tensor* out = nullptr;
|
||||
|
||||
int64_t t0 = ggml_time_ms();
|
||||
compute(8, z, true, &out, work_ctx);
|
||||
|
|
@ -1250,7 +1252,7 @@ namespace WAN {
|
|||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_F16;
|
||||
std::shared_ptr<WanVAERunner> vae = std::shared_ptr<WanVAERunner>(new WanVAERunner(backend, false, {}, "", false, VERSION_WAN2_2_TI2V));
|
||||
std::shared_ptr<WanVAERunner> vae = std::make_shared<WanVAERunner>(backend, false, String2GGMLType{}, "", false, VERSION_WAN2_2_TI2V);
|
||||
{
|
||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||
|
||||
|
|
@ -1309,7 +1311,7 @@ namespace WAN {
|
|||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* mask = NULL) {
|
||||
struct ggml_tensor* mask = nullptr) {
|
||||
// x: [N, n_token, dim]
|
||||
// pe: [n_token, d_head/2, 2, 2]
|
||||
// return [N, n_token, dim]
|
||||
|
|
@ -1367,7 +1369,7 @@ namespace WAN {
|
|||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* context,
|
||||
int64_t context_img_len) {
|
||||
int64_t context_img_len) override {
|
||||
// x: [N, n_token, dim]
|
||||
// context: [N, n_context, dim]
|
||||
// context_img_len: unused
|
||||
|
|
@ -1388,7 +1390,7 @@ namespace WAN {
|
|||
k = norm_k->forward(ctx, k);
|
||||
auto v = v_proj->forward(ctx, context); // [N, n_context, dim]
|
||||
|
||||
x = ggml_nn_attention_ext(ctx, backend, q, k, v, num_heads, NULL, false, false, flash_attn); // [N, n_token, dim]
|
||||
x = ggml_nn_attention_ext(ctx, backend, q, k, v, num_heads, nullptr, false, false, flash_attn); // [N, n_token, dim]
|
||||
|
||||
x = o_proj->forward(ctx, x); // [N, n_token, dim]
|
||||
return x;
|
||||
|
|
@ -1417,7 +1419,7 @@ namespace WAN {
|
|||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* context,
|
||||
int64_t context_img_len) {
|
||||
int64_t context_img_len) override {
|
||||
// x: [N, n_token, dim]
|
||||
// context: [N, context_img_len + context_txt_len, dim]
|
||||
// return [N, n_token, dim]
|
||||
|
|
@ -1455,8 +1457,8 @@ namespace WAN {
|
|||
k_img = norm_k_img->forward(ctx, k_img);
|
||||
auto v_img = v_img_proj->forward(ctx, context_img); // [N, context_img_len, dim]
|
||||
|
||||
auto img_x = ggml_nn_attention_ext(ctx, backend, q, k_img, v_img, num_heads, NULL, false, false, flash_attn); // [N, n_token, dim]
|
||||
x = ggml_nn_attention_ext(ctx, backend, q, k, v, num_heads, NULL, false, false, flash_attn); // [N, n_token, dim]
|
||||
auto img_x = ggml_nn_attention_ext(ctx, backend, q, k_img, v_img, num_heads, nullptr, false, false, flash_attn); // [N, n_token, dim]
|
||||
x = ggml_nn_attention_ext(ctx, backend, q, k, v, num_heads, nullptr, false, false, flash_attn); // [N, n_token, dim]
|
||||
|
||||
x = ggml_add(ctx, x, img_x);
|
||||
|
||||
|
|
@ -1497,7 +1499,7 @@ namespace WAN {
|
|||
protected:
|
||||
int dim;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
|
||||
enum ggml_type wtype = get_type(prefix + "weight", tensor_types, GGML_TYPE_F32);
|
||||
params["modulation"] = ggml_new_tensor_3d(ctx, wtype, dim, 6, 1);
|
||||
}
|
||||
|
|
@ -1587,7 +1589,7 @@ namespace WAN {
|
|||
class VaceWanAttentionBlock : public WanAttentionBlock {
|
||||
protected:
|
||||
int block_id;
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
|
||||
enum ggml_type wtype = get_type(prefix + "weight", tensor_types, GGML_TYPE_F32);
|
||||
params["modulation"] = ggml_new_tensor_3d(ctx, wtype, dim, 6, 1);
|
||||
}
|
||||
|
|
@ -1641,7 +1643,7 @@ namespace WAN {
|
|||
protected:
|
||||
int dim;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
|
||||
enum ggml_type wtype = get_type(prefix + "weight", tensor_types, GGML_TYPE_F32);
|
||||
params["modulation"] = ggml_new_tensor_3d(ctx, wtype, dim, 2, 1);
|
||||
}
|
||||
|
|
@ -1688,7 +1690,7 @@ namespace WAN {
|
|||
int in_dim;
|
||||
int flf_pos_embed_token_number;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
|
||||
if (flf_pos_embed_token_number > 0) {
|
||||
params["emb_pos"] = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, in_dim, flf_pos_embed_token_number, 1);
|
||||
}
|
||||
|
|
@ -1876,8 +1878,8 @@ namespace WAN {
|
|||
struct ggml_tensor* timestep,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* clip_fea = NULL,
|
||||
struct ggml_tensor* vace_context = NULL,
|
||||
struct ggml_tensor* clip_fea = nullptr,
|
||||
struct ggml_tensor* vace_context = nullptr,
|
||||
float vace_strength = 1.f,
|
||||
int64_t N = 1) {
|
||||
// x: [N*C, T, H, W], C => in_dim
|
||||
|
|
@ -1920,7 +1922,7 @@ namespace WAN {
|
|||
context = text_embedding_2->forward(ctx, context); // [N, context_txt_len, dim]
|
||||
|
||||
int64_t context_img_len = 0;
|
||||
if (clip_fea != NULL) {
|
||||
if (clip_fea != nullptr) {
|
||||
if (params.model_type == "i2v") {
|
||||
auto img_emb = std::dynamic_pointer_cast<MLPProj>(blocks["img_emb"]);
|
||||
auto context_img = img_emb->forward(ctx, clip_fea); // [N, context_img_len, dim]
|
||||
|
|
@ -1930,7 +1932,7 @@ namespace WAN {
|
|||
}
|
||||
|
||||
// vace_patch_embedding
|
||||
ggml_tensor* c = NULL;
|
||||
ggml_tensor* c = nullptr;
|
||||
if (params.vace_layers > 0) {
|
||||
auto vace_patch_embedding = std::dynamic_pointer_cast<Conv3d>(blocks["vace_patch_embedding"]);
|
||||
|
||||
|
|
@ -1971,9 +1973,9 @@ namespace WAN {
|
|||
struct ggml_tensor* timestep,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* clip_fea = NULL,
|
||||
struct ggml_tensor* time_dim_concat = NULL,
|
||||
struct ggml_tensor* vace_context = NULL,
|
||||
struct ggml_tensor* clip_fea = nullptr,
|
||||
struct ggml_tensor* time_dim_concat = nullptr,
|
||||
struct ggml_tensor* vace_context = nullptr,
|
||||
float vace_strength = 1.f,
|
||||
int64_t N = 1) {
|
||||
// Forward pass of DiT.
|
||||
|
|
@ -1997,7 +1999,7 @@ namespace WAN {
|
|||
int64_t h_len = ((H + (std::get<1>(params.patch_size) / 2)) / std::get<1>(params.patch_size));
|
||||
int64_t w_len = ((W + (std::get<2>(params.patch_size) / 2)) / std::get<2>(params.patch_size));
|
||||
|
||||
if (time_dim_concat != NULL) {
|
||||
if (time_dim_concat != nullptr) {
|
||||
time_dim_concat = pad_to_patch_size(ctx, time_dim_concat);
|
||||
x = ggml_concat(ctx, x, time_dim_concat, 2); // [N*C, (T+pad_t) + (T2+pad_t2), H + pad_h, W + pad_w]
|
||||
t_len = ((x->ne[2] + (std::get<0>(params.patch_size) / 2)) / std::get<0>(params.patch_size));
|
||||
|
|
@ -2134,7 +2136,7 @@ namespace WAN {
|
|||
wan.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
std::string get_desc() override {
|
||||
return desc;
|
||||
}
|
||||
|
||||
|
|
@ -2145,10 +2147,10 @@ namespace WAN {
|
|||
struct ggml_cgraph* build_graph(struct ggml_tensor* x,
|
||||
struct ggml_tensor* timesteps,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* clip_fea = NULL,
|
||||
struct ggml_tensor* c_concat = NULL,
|
||||
struct ggml_tensor* time_dim_concat = NULL,
|
||||
struct ggml_tensor* vace_context = NULL,
|
||||
struct ggml_tensor* clip_fea = nullptr,
|
||||
struct ggml_tensor* c_concat = nullptr,
|
||||
struct ggml_tensor* time_dim_concat = nullptr,
|
||||
struct ggml_tensor* vace_context = nullptr,
|
||||
float vace_strength = 1.f) {
|
||||
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, WAN_GRAPH_SIZE, false);
|
||||
|
||||
|
|
@ -2174,10 +2176,10 @@ namespace WAN {
|
|||
auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, wan_params.axes_dim_sum / 2, pos_len);
|
||||
// pe->data = pe_vec.data();
|
||||
// print_ggml_tensor(pe);
|
||||
// pe->data = NULL;
|
||||
// pe->data = nullptr;
|
||||
set_backend_tensor_data(pe, pe_vec.data());
|
||||
|
||||
if (c_concat != NULL) {
|
||||
if (c_concat != nullptr) {
|
||||
x = ggml_concat(compute_ctx, x, c_concat, 3);
|
||||
}
|
||||
|
||||
|
|
@ -2201,13 +2203,13 @@ namespace WAN {
|
|||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* timesteps,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* clip_fea = NULL,
|
||||
struct ggml_tensor* c_concat = NULL,
|
||||
struct ggml_tensor* time_dim_concat = NULL,
|
||||
struct ggml_tensor* vace_context = NULL,
|
||||
struct ggml_tensor* clip_fea = nullptr,
|
||||
struct ggml_tensor* c_concat = nullptr,
|
||||
struct ggml_tensor* time_dim_concat = nullptr,
|
||||
struct ggml_tensor* vace_context = nullptr,
|
||||
float vace_strength = 1.f,
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL) {
|
||||
struct ggml_tensor** output = nullptr,
|
||||
struct ggml_context* output_ctx = nullptr) {
|
||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_graph(x, timesteps, context, clip_fea, c_concat, time_dim_concat, vace_context, vace_strength);
|
||||
};
|
||||
|
|
@ -2218,11 +2220,11 @@ namespace WAN {
|
|||
void test() {
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(200 * 1024 * 1024); // 200 MB
|
||||
params.mem_buffer = NULL;
|
||||
params.mem_buffer = nullptr;
|
||||
params.no_alloc = false;
|
||||
|
||||
struct ggml_context* work_ctx = ggml_init(params);
|
||||
GGML_ASSERT(work_ctx != NULL);
|
||||
GGML_ASSERT(work_ctx != nullptr);
|
||||
|
||||
{
|
||||
// cpu f16: pass
|
||||
|
|
@ -2244,10 +2246,10 @@ namespace WAN {
|
|||
// auto clip_fea = load_tensor_from_file(work_ctx, "wan_dit_clip_fea.bin");
|
||||
// print_ggml_tensor(clip_fea);
|
||||
|
||||
struct ggml_tensor* out = NULL;
|
||||
struct ggml_tensor* out = nullptr;
|
||||
|
||||
int t0 = ggml_time_ms();
|
||||
compute(8, x, timesteps, context, NULL, NULL, NULL, NULL, 1.f, &out, work_ctx);
|
||||
compute(8, x, timesteps, context, nullptr, nullptr, nullptr, nullptr, 1.f, &out, work_ctx);
|
||||
int t1 = ggml_time_ms();
|
||||
|
||||
print_ggml_tensor(out);
|
||||
|
|
@ -2275,12 +2277,12 @@ namespace WAN {
|
|||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<WanRunner> wan = std::shared_ptr<WanRunner>(new WanRunner(backend,
|
||||
false,
|
||||
tensor_types,
|
||||
"model.diffusion_model",
|
||||
VERSION_WAN2_2_TI2V,
|
||||
true));
|
||||
std::shared_ptr<WanRunner> wan = std::make_shared<WanRunner>(backend,
|
||||
false,
|
||||
tensor_types,
|
||||
"model.diffusion_model",
|
||||
VERSION_WAN2_2_TI2V,
|
||||
true);
|
||||
|
||||
wan->alloc_params_buffer();
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue