diff --git a/otherarch/sdcpp/conditioner.hpp b/otherarch/sdcpp/conditioner.hpp index 59538f683..4005fadf7 100644 --- a/otherarch/sdcpp/conditioner.hpp +++ b/otherarch/sdcpp/conditioner.hpp @@ -597,7 +597,6 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner { GGML_ASSERT(it != tokens.end()); // prompt must have trigger word tokens.erase(it); return decode(tokens); - //return prompt; //kcpp we don't care about photomaker trigger words } SDCondition get_learned_condition(ggml_context* work_ctx, @@ -903,6 +902,7 @@ struct SD3CLIPEmbedder : public Conditioner { t5->compute(n_threads, input_ids, + NULL, &chunk_hidden_states_t5, work_ctx); { @@ -1148,6 +1148,7 @@ struct FluxCLIPEmbedder : public Conditioner { t5->compute(n_threads, input_ids, + NULL, &chunk_hidden_states, work_ctx); { @@ -1223,10 +1224,15 @@ struct PixArtCLIPEmbedder : public Conditioner { T5UniGramTokenizer t5_tokenizer; std::shared_ptr t5; size_t chunk_len = 512; + bool use_mask = false; + int mask_pad = 1; PixArtCLIPEmbedder(ggml_backend_t backend, std::map& tensor_types, - int clip_skip = -1) { + int clip_skip = -1, + bool use_mask = false, + int mask_pad = 1) + : use_mask(use_mask), mask_pad(mask_pad) { t5 = std::make_shared(backend, tensor_types, "text_encoders.t5xxl.transformer"); } @@ -1323,16 +1329,6 @@ struct PixArtCLIPEmbedder : public Conditioner { size_t chunk_count = t5_tokens.size() / chunk_len; - bool use_mask = false; - const char* SD_CHROMA_USE_T5_MASK = getenv("SD_CHROMA_USE_T5_MASK"); - if (SD_CHROMA_USE_T5_MASK != nullptr) { - std::string sd_chroma_use_t5_mask_str = SD_CHROMA_USE_T5_MASK; - if (sd_chroma_use_t5_mask_str == "ON" || sd_chroma_use_t5_mask_str == "TRUE") { - use_mask = true; - } else if (sd_chroma_use_t5_mask_str != "OFF" && sd_chroma_use_t5_mask_str != "FALSE") { - LOG_WARN("SD_CHROMA_USE_T5_MASK environment variable has unexpected value. Assuming default (\"OFF\"). (Expected \"OFF\"/\"FALSE\" or\"ON\"/\"TRUE\", got \"%s\")", SD_CHROMA_USE_T5_MASK); - } - } for (int chunk_idx = 0; chunk_idx < chunk_count; chunk_idx++) { // t5 std::vector chunk_tokens(t5_tokens.begin() + chunk_idx * chunk_len, @@ -1347,9 +1343,9 @@ struct PixArtCLIPEmbedder : public Conditioner { t5->compute(n_threads, input_ids, + t5_attn_mask_chunk, &chunk_hidden_states, - work_ctx, - t5_attn_mask_chunk); + work_ctx); { auto tensor = chunk_hidden_states; float original_mean = ggml_tensor_mean(tensor); @@ -1391,18 +1387,6 @@ struct PixArtCLIPEmbedder : public Conditioner { ggml_set_f32(hidden_states, 0.f); } - int mask_pad = 1; - const char* SD_CHROMA_MASK_PAD_OVERRIDE = getenv("SD_CHROMA_MASK_PAD_OVERRIDE"); - if (SD_CHROMA_MASK_PAD_OVERRIDE != nullptr) { - std::string mask_pad_str = SD_CHROMA_MASK_PAD_OVERRIDE; - try { - mask_pad = std::stoi(mask_pad_str); - } catch (const std::invalid_argument&) { - LOG_WARN("SD_CHROMA_MASK_PAD_OVERRIDE environment variable is not a valid integer (%s). Falling back to default (%d)", SD_CHROMA_MASK_PAD_OVERRIDE, mask_pad); - } catch (const std::out_of_range&) { - LOG_WARN("SD_CHROMA_MASK_PAD_OVERRIDE environment variable value is out of range for `int` type (%s). Falling back to default (%d)", SD_CHROMA_MASK_PAD_OVERRIDE, mask_pad); - } - } modify_mask_to_attend_padding(t5_attn_mask, ggml_nelements(t5_attn_mask), mask_pad); return SDCondition(hidden_states, t5_attn_mask, NULL); diff --git a/otherarch/sdcpp/diffusion_model.hpp b/otherarch/sdcpp/diffusion_model.hpp index 48522b25f..65680b8d9 100644 --- a/otherarch/sdcpp/diffusion_model.hpp +++ b/otherarch/sdcpp/diffusion_model.hpp @@ -137,8 +137,9 @@ struct FluxModel : public DiffusionModel { FluxModel(ggml_backend_t backend, std::map& tensor_types, SDVersion version = VERSION_FLUX, - bool flash_attn = false) - : flux(backend, tensor_types, "model.diffusion_model", version, flash_attn) { + bool flash_attn = false, + bool use_mask = false) + : flux(backend, tensor_types, "model.diffusion_model", version, flash_attn, use_mask) { } void alloc_params_buffer() { diff --git a/otherarch/sdcpp/flux.hpp b/otherarch/sdcpp/flux.hpp index e6d941af9..a16125102 100644 --- a/otherarch/sdcpp/flux.hpp +++ b/otherarch/sdcpp/flux.hpp @@ -744,10 +744,10 @@ namespace Flux { return ids; } + // Generate positional embeddings std::vector gen_pe(int h, int w, int patch_size, int bs, int context_len, std::vector ref_latents, int theta, const std::vector& axes_dim) { std::vector> ids = gen_ids(h, w, patch_size, bs, context_len, ref_latents); - std::vector> trans_ids = transpose(ids); size_t pos_len = ids.size(); int num_axes = axes_dim.size(); @@ -872,7 +872,7 @@ namespace Flux { struct ggml_tensor* y, struct ggml_tensor* guidance, struct ggml_tensor* pe, - struct ggml_tensor* arange = NULL, + struct ggml_tensor* mod_index_arange = NULL, std::vector skip_layers = {}) { auto img_in = std::dynamic_pointer_cast(blocks["img_in"]); auto txt_in = std::dynamic_pointer_cast(blocks["txt_in"]); @@ -887,9 +887,10 @@ namespace Flux { auto distill_timestep = ggml_nn_timestep_embedding(ctx, timesteps, 16, 10000, 1000.f); auto distill_guidance = ggml_nn_timestep_embedding(ctx, guidance, 16, 10000, 1000.f); - // auto arange = ggml_arange(ctx, 0, (float)mod_index_length, 1); // Not working on a lot of backends, precomputing it on CPU instead + // auto mod_index_arange = ggml_arange(ctx, 0, (float)mod_index_length, 1); + // ggml_arange tot working on a lot of backends, precomputing it on CPU instead GGML_ASSERT(arange != NULL); - auto modulation_index = ggml_nn_timestep_embedding(ctx, arange, 32, 10000, 1000.f); // [1, 344, 32] + auto modulation_index = ggml_nn_timestep_embedding(ctx, mod_index_arange, 32, 10000, 1000.f); // [1, 344, 32] // Batch broadcast (will it ever be useful) modulation_index = ggml_repeat(ctx, modulation_index, ggml_new_tensor_3d(ctx, GGML_TYPE_F32, modulation_index->ne[0], modulation_index->ne[1], img->ne[2])); // [N, 344, 32] @@ -982,7 +983,7 @@ namespace Flux { struct ggml_tensor* y, struct ggml_tensor* guidance, struct ggml_tensor* pe, - struct ggml_tensor* arange = NULL, + struct ggml_tensor* mod_index_arange = NULL, std::vector ref_latents = {}, std::vector skip_layers = {}) { // Forward pass of DiT. @@ -1024,7 +1025,7 @@ namespace Flux { } } - auto out = forward_orig(ctx, img, context, timestep, y, guidance, pe, arange, skip_layers); // [N, num_tokens, C * patch_size * patch_size] + auto out = forward_orig(ctx, img, context, timestep, y, guidance, pe, mod_index_arange, skip_layers); // [N, num_tokens, C * patch_size * patch_size] if (out->ne[1] > img_tokens) { out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size] out = ggml_view_3d(ctx, out, out->ne[0], out->ne[1], img_tokens, out->nb[1], out->nb[2], 0); @@ -1044,15 +1045,18 @@ namespace Flux { public: FluxParams flux_params; Flux flux; - std::vector pe_vec, range; // for cache + std::vector pe_vec; + std::vector mod_index_arange_vec; // for cache SDVersion version; + bool use_mask = false; FluxRunner(ggml_backend_t backend, std::map& tensor_types = empty_tensor_types, const std::string prefix = "", SDVersion version = VERSION_FLUX, - bool flash_attn = false) - : GGMLRunner(backend) { + bool flash_attn = false, + bool use_mask = false) + : GGMLRunner(backend), use_mask(use_mask) { flux_params.flash_attn = flash_attn; flux_params.guidance_embed = false; flux_params.depth = 0; @@ -1116,51 +1120,28 @@ namespace Flux { struct ggml_tensor* y, struct ggml_tensor* guidance, std::vector ref_latents = {}, - std::vector skip_layers = std::vector()) { + std::vector skip_layers = {}) { GGML_ASSERT(x->ne[3] == 1); struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, FLUX_GRAPH_SIZE, false); - struct ggml_tensor* precompute_arange = NULL; + struct ggml_tensor* mod_index_arange = NULL; x = to_backend(x); context = to_backend(context); if (c_concat != NULL) { c_concat = to_backend(c_concat); } - if (flux_params.is_chroma) { - const char* SD_CHROMA_ENABLE_GUIDANCE = getenv("SD_CHROMA_ENABLE_GUIDANCE"); - bool disable_guidance = true; - if (SD_CHROMA_ENABLE_GUIDANCE != NULL) { - std::string enable_guidance_str = SD_CHROMA_ENABLE_GUIDANCE; - if (enable_guidance_str == "ON" || enable_guidance_str == "TRUE") { - LOG_WARN("Chroma guidance has been enabled. Image might be broken. (SD_CHROMA_ENABLE_GUIDANCE env variable to \"OFF\" to disable)", SD_CHROMA_ENABLE_GUIDANCE); - disable_guidance = false; - } else if (enable_guidance_str != "OFF" && enable_guidance_str != "FALSE") { - LOG_WARN("SD_CHROMA_ENABLE_GUIDANCE environment variable has unexpected value. Assuming default (\"OFF\"). (Expected \"ON\"/\"TRUE\" or\"OFF\"/\"FALSE\", got \"%s\")", SD_CHROMA_ENABLE_GUIDANCE); - } - } - if (disable_guidance) { - // LOG_DEBUG("Forcing guidance to 0 for chroma model (SD_CHROMA_ENABLE_GUIDANCE env variable to \"ON\" to enable)"); - guidance = ggml_set_f32(guidance, 0); + guidance = ggml_set_f32(guidance, 0); + + if (!use_mask) { + y = NULL; } - - const char* SD_CHROMA_USE_DIT_MASK = getenv("SD_CHROMA_USE_DIT_MASK"); - if (SD_CHROMA_USE_DIT_MASK != nullptr) { - std::string sd_chroma_use_DiT_mask_str = SD_CHROMA_USE_DIT_MASK; - if (sd_chroma_use_DiT_mask_str == "OFF" || sd_chroma_use_DiT_mask_str == "FALSE") { - y = NULL; - } else if (sd_chroma_use_DiT_mask_str != "ON" && sd_chroma_use_DiT_mask_str != "TRUE") { - LOG_WARN("SD_CHROMA_USE_DIT_MASK environment variable has unexpected value. Assuming default (\"ON\"). (Expected \"ON\"/\"TRUE\" or\"OFF\"/\"FALSE\", got \"%s\")", SD_CHROMA_USE_DIT_MASK); - } - } - - // ggml_arrange is not working on some backends, and y isn't used, so let's reuse y to precompute it - range = arange(0, 344); - precompute_arange = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, range.size()); - set_backend_tensor_data(precompute_arange, range.data()); - // y = NULL; + // ggml_arange is not working on some backends, precompute it + mod_index_arange_vec = arange(0, 344); + mod_index_arange = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, mod_index_arange_vec.size()); + set_backend_tensor_data(mod_index_arange, mod_index_arange_vec.data()); } y = to_backend(y); @@ -1189,7 +1170,7 @@ namespace Flux { y, guidance, pe, - precompute_arange, + mod_index_arange, ref_latents, skip_layers); diff --git a/otherarch/sdcpp/main.cpp b/otherarch/sdcpp/main.cpp index 9499e0c34..55e9591d3 100644 --- a/otherarch/sdcpp/main.cpp +++ b/otherarch/sdcpp/main.cpp @@ -128,6 +128,10 @@ struct SDParams { float slg_scale = 0.f; float skip_layer_start = 0.01f; float skip_layer_end = 0.2f; + + bool chroma_use_dit_mask = true; + bool chroma_use_t5_mask = false; + int chroma_t5_mask_pad = 1; }; void print_params(SDParams params) { @@ -177,6 +181,9 @@ void print_params(SDParams params) { printf(" batch_count: %d\n", params.batch_count); printf(" vae_tiling: %s\n", params.vae_tiling ? "true" : "false"); printf(" upscale_repeats: %d\n", params.upscale_repeats); + printf(" chroma_use_dit_mask: %s\n", params.chroma_use_dit_mask ? "true" : "false"); + printf(" chroma_use_t5_mask: %s\n", params.chroma_use_t5_mask ? "true" : "false"); + printf(" chroma_t5_mask_pad: %d\n", params.chroma_t5_mask_pad); } void print_usage(int argc, const char* argv[]) { @@ -243,6 +250,9 @@ void print_usage(int argc, const char* argv[]) { printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n"); printf(" --canny apply canny preprocessor (edge detection)\n"); printf(" --color Colors the logging tags according to level\n"); + printf(" --chroma-disable-dit-mask disable dit mask for chroma\n"); + printf(" --chroma-enable-t5-mask enable t5 mask for chroma\n"); + printf(" --chroma-t5-mask-pad PAD_SIZE t5 mask pad size of chroma\n"); printf(" -v, --verbose print extra info\n"); printf(" -ki, --kontext_img [PATH] Reference image for Flux Kontext models (can be used multiple times) \n"); } @@ -938,7 +948,10 @@ int main(int argc, const char* argv[]) { params.clip_on_cpu, params.control_net_cpu, params.vae_on_cpu, - params.diffusion_flash_attn); + params.diffusion_flash_attn, + params.chroma_use_dit_mask, + params.chroma_use_t5_mask, + params.chroma_t5_mask_pad); if (sd_ctx == NULL) { printf("new_sd_ctx_t failed\n"); diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp index 7711b4765..6e111e74c 100644 --- a/otherarch/sdcpp/sdtype_adapter.cpp +++ b/otherarch/sdcpp/sdtype_adapter.cpp @@ -104,6 +104,10 @@ struct SDParams { float slg_scale = 0.f; float skip_layer_start = 0.01f; float skip_layer_end = 0.2f; + + bool chroma_use_dit_mask = true; + bool chroma_use_t5_mask = false; + int chroma_t5_mask_pad = 1; }; //shared @@ -272,7 +276,10 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) { sd_params->clip_on_cpu, sd_params->control_net_cpu, sd_params->vae_on_cpu, - sd_params->diffusion_flash_attn); + sd_params->diffusion_flash_attn, + sd_params->chroma_use_dit_mask, + sd_params->chroma_use_t5_mask, + sd_params->chroma_t5_mask_pad); if (sd_ctx == NULL) { printf("\nError: KCPP SD Failed to create context!\nIf using Flux/SD3.5, make sure you have ALL files required (e.g. VAE, T5, Clip...) or baked in!\n"); diff --git a/otherarch/sdcpp/stable-diffusion.cpp b/otherarch/sdcpp/stable-diffusion.cpp index 5e0a79d01..322e888bd 100644 --- a/otherarch/sdcpp/stable-diffusion.cpp +++ b/otherarch/sdcpp/stable-diffusion.cpp @@ -159,7 +159,10 @@ public: bool clip_on_cpu, bool control_net_cpu, bool vae_on_cpu, - bool diffusion_flash_attn) { + bool diffusion_flash_attn, + bool chroma_use_dit_mask, + bool chroma_use_t5_mask, + int chroma_t5_mask_pad) { use_tiny_autoencoder = taesd_path.size() > 0; std::string taesd_path_fixed = taesd_path; is_loaded_chroma = false; @@ -391,11 +394,11 @@ public: } } if (is_chroma) { - cond_stage_model = std::make_shared(clip_backend, model_loader.tensor_storages_types); + cond_stage_model = std::make_shared(clip_backend, model_loader.tensor_storages_types, -1, chroma_use_t5_mask, chroma_t5_mask_pad); } else { cond_stage_model = std::make_shared(clip_backend, model_loader.tensor_storages_types); } - diffusion_model = std::make_shared(backend, model_loader.tensor_storages_types, version, diffusion_flash_attn); + diffusion_model = std::make_shared(backend, model_loader.tensor_storages_types, version, diffusion_flash_attn, chroma_use_dit_mask); } else { if (id_embeddings_path.find("v2") != std::string::npos) { cond_stage_model = std::make_shared(clip_backend, model_loader.tensor_storages_types, embeddings_path, version, PM_VERSION_2); @@ -1337,7 +1340,10 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str, bool keep_clip_on_cpu, bool keep_control_net_cpu, bool keep_vae_on_cpu, - bool diffusion_flash_attn) { + bool diffusion_flash_attn, + bool chroma_use_dit_mask, + bool chroma_use_t5_mask, + int chroma_t5_mask_pad) { sd_ctx_t* sd_ctx = (sd_ctx_t*)malloc(sizeof(sd_ctx_t)); if (sd_ctx == NULL) { return NULL; @@ -1379,7 +1385,10 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str, keep_clip_on_cpu, keep_control_net_cpu, keep_vae_on_cpu, - diffusion_flash_attn)) { + diffusion_flash_attn, + chroma_use_dit_mask, + chroma_use_t5_mask, + chroma_t5_mask_pad)) { delete sd_ctx->sd; sd_ctx->sd = NULL; free(sd_ctx); @@ -2231,5 +2240,133 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx, LOG_INFO("img2vid completed in %.2fs", (t3 - t0) * 1.0f / 1000); + return result_images; +} + +sd_image_t* edit(sd_ctx_t* sd_ctx, + sd_image_t* ref_images, + int ref_images_count, + const char* prompt_c_str, + const char* negative_prompt_c_str, + int clip_skip, + float cfg_scale, + float guidance, + float eta, + int width, + int height, + sample_method_t sample_method, + int sample_steps, + float strength, + int64_t seed, + int batch_count, + const sd_image_t* control_cond, + float control_strength, + float style_ratio, + bool normalize_input, + int* skip_layers = NULL, + size_t skip_layers_count = 0, + float slg_scale = 0, + float skip_layer_start = 0.01, + float skip_layer_end = 0.2) { + std::vector skip_layers_vec(skip_layers, skip_layers + skip_layers_count); + LOG_DEBUG("edit %dx%d", width, height); + if (sd_ctx == NULL) { + return NULL; + } + if (ref_images_count <= 0) { + LOG_ERROR("ref images count should > 0"); + return NULL; + } + + struct ggml_init_params params; + params.mem_size = static_cast(30 * 1024 * 1024); // 10 MB + params.mem_size += width * height * 3 * sizeof(float) * 3 * ref_images_count; + params.mem_size *= batch_count; + params.mem_buffer = NULL; + params.no_alloc = false; + // LOG_DEBUG("mem_size %u ", params.mem_size); + + struct ggml_context* work_ctx = ggml_init(params); + if (!work_ctx) { + LOG_ERROR("ggml_init() failed"); + return NULL; + } + + if (seed < 0) { + srand((int)time(NULL)); + seed = rand(); + } + sd_ctx->sd->rng->manual_seed(seed); + + int C = 4; + if (sd_version_is_sd3(sd_ctx->sd->version)) { + C = 16; + } else if (sd_version_is_flux(sd_ctx->sd->version)) { + C = 16; + } + int W = width / 8; + int H = height / 8; + ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1); + if (sd_version_is_sd3(sd_ctx->sd->version)) { + ggml_set_f32(init_latent, 0.0609f); + } else if (sd_version_is_flux(sd_ctx->sd->version)) { + ggml_set_f32(init_latent, 0.1159f); + } else { + ggml_set_f32(init_latent, 0.f); + } + + size_t t0 = ggml_time_ms(); + + std::vector ref_latents; + for (int i = 0; i < ref_images_count; i++) { + ggml_tensor* img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, ref_images[i].width, ref_images[i].height, 3, 1); + sd_image_to_tensor(ref_images[i].data, img); + + ggml_tensor* latent = NULL; + if (!sd_ctx->sd->use_tiny_autoencoder) { + ggml_tensor* moments = sd_ctx->sd->encode_first_stage(work_ctx, img); + latent = sd_ctx->sd->get_first_stage_encoding(work_ctx, moments); + } else { + latent = sd_ctx->sd->encode_first_stage(work_ctx, img); + } + ref_latents.push_back(latent); + } + + size_t t1 = ggml_time_ms(); + LOG_INFO("encode_first_stage completed, taking %.2fs", (t1 - t0) * 1.0f / 1000); + + std::vector sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps); + + sd_image_t* result_images = generate_image(sd_ctx, + work_ctx, + init_latent, + prompt_c_str, + negative_prompt_c_str, + clip_skip, + cfg_scale, + guidance, + eta, + width, + height, + sample_method, + sigmas, + seed, + batch_count, + control_cond, + control_strength, + style_ratio, + normalize_input, + "", + ref_latents, + skip_layers_vec, + slg_scale, + skip_layer_start, + skip_layer_end, + NULL); + + size_t t2 = ggml_time_ms(); + + LOG_INFO("edit completed in %.2fs", (t2 - t0) * 1.0f / 1000); + return result_images; } \ No newline at end of file diff --git a/otherarch/sdcpp/stable-diffusion.h b/otherarch/sdcpp/stable-diffusion.h index 0b0729717..fc0cd75e9 100644 --- a/otherarch/sdcpp/stable-diffusion.h +++ b/otherarch/sdcpp/stable-diffusion.h @@ -154,7 +154,10 @@ SD_API sd_ctx_t* new_sd_ctx(const char* model_path, bool keep_clip_on_cpu, bool keep_control_net_cpu, bool keep_vae_on_cpu, - bool diffusion_flash_attn); + bool diffusion_flash_attn, + bool chroma_use_dit_mask, + bool chroma_use_t5_mask, + int chroma_t5_mask_pad); SD_API void free_sd_ctx(sd_ctx_t* sd_ctx); @@ -230,6 +233,32 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx, float strength, int64_t seed); +SD_API sd_image_t* edit(sd_ctx_t* sd_ctx, + sd_image_t* ref_images, + int ref_images_count, + const char* prompt, + const char* negative_prompt, + int clip_skip, + float cfg_scale, + float guidance, + float eta, + int width, + int height, + enum sample_method_t sample_method, + int sample_steps, + float strength, + int64_t seed, + int batch_count, + const sd_image_t* control_cond, + float control_strength, + float style_strength, + bool normalize_input, + int* skip_layers, + size_t skip_layers_count, + float slg_scale, + float skip_layer_start, + float skip_layer_end); + typedef struct upscaler_ctx_t upscaler_ctx_t; SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path, diff --git a/otherarch/sdcpp/t5.hpp b/otherarch/sdcpp/t5.hpp index 4cbde3eda..1861ad478 100644 --- a/otherarch/sdcpp/t5.hpp +++ b/otherarch/sdcpp/t5.hpp @@ -795,9 +795,9 @@ struct T5Runner : public GGMLRunner { void compute(const int n_threads, struct ggml_tensor* input_ids, + struct ggml_tensor* attention_mask, ggml_tensor** output, - ggml_context* output_ctx = NULL, - struct ggml_tensor* attention_mask = NULL) { + ggml_context* output_ctx = NULL) { auto get_graph = [&]() -> struct ggml_cgraph* { return build_graph(input_ids, attention_mask); }; @@ -966,7 +966,7 @@ struct T5Embedder { struct ggml_tensor* out = NULL; int t0 = ggml_time_ms(); - model.compute(8, input_ids, &out, work_ctx); + model.compute(8, input_ids, NULL, &out, work_ctx); int t1 = ggml_time_ms(); print_ggml_tensor(out);