From b2719268df4f9dff0883e321ece4f79c8edc8d51 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 17 Apr 2025 00:52:49 +0800 Subject: [PATCH] merge sdcpp fixes --- otherarch/sdcpp/conditioner.hpp | 8 ++--- otherarch/sdcpp/model.cpp | 58 +++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/otherarch/sdcpp/conditioner.hpp b/otherarch/sdcpp/conditioner.hpp index 6e9acdb19..d28d5661b 100644 --- a/otherarch/sdcpp/conditioner.hpp +++ b/otherarch/sdcpp/conditioner.hpp @@ -458,8 +458,8 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner { if (sd_version_is_sdxl(version)) { text_model2->compute(n_threads, input_ids2, - 0, - NULL, + num_custom_embeddings, + token_embed_custom.data(), max_token_idx, false, &chunk_hidden_states2, work_ctx); @@ -469,8 +469,8 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner { if (chunk_idx == 0) { text_model2->compute(n_threads, input_ids2, - 0, - NULL, + num_custom_embeddings, + token_embed_custom.data(), max_token_idx, true, &pooled, diff --git a/otherarch/sdcpp/model.cpp b/otherarch/sdcpp/model.cpp index 9e62bdbcc..3ce7af730 100644 --- a/otherarch/sdcpp/model.cpp +++ b/otherarch/sdcpp/model.cpp @@ -191,6 +191,64 @@ std::unordered_map pmid_v2_name_map = { std::string convert_open_clip_to_hf_clip(const std::string& name) { std::string new_name = name; std::string prefix; + if (contains(new_name, ".enc.")) { + // llama.cpp naming convention for T5 + size_t pos = new_name.find(".enc."); + if (pos != std::string::npos) { + new_name.replace(pos, 5, ".encoder."); + } + pos = new_name.find("blk."); + if (pos != std::string::npos) { + new_name.replace(pos, 4, "block."); + } + pos = new_name.find("output_norm."); + if (pos != std::string::npos) { + new_name.replace(pos, 12, "final_layer_norm."); + } + pos = new_name.find("attn_k."); + if (pos != std::string::npos) { + new_name.replace(pos, 7, "layer.0.SelfAttention.k."); + } + pos = new_name.find("attn_v."); + if (pos != std::string::npos) { + new_name.replace(pos, 7, "layer.0.SelfAttention.v."); + } + pos = new_name.find("attn_o."); + if (pos != std::string::npos) { + new_name.replace(pos, 7, "layer.0.SelfAttention.o."); + } + pos = new_name.find("attn_q."); + if (pos != std::string::npos) { + new_name.replace(pos, 7, "layer.0.SelfAttention.q."); + } + pos = new_name.find("attn_norm."); + if (pos != std::string::npos) { + new_name.replace(pos, 10, "layer.0.layer_norm."); + } + pos = new_name.find("ffn_norm."); + if (pos != std::string::npos) { + new_name.replace(pos, 9, "layer.1.layer_norm."); + } + pos = new_name.find("ffn_up."); + if (pos != std::string::npos) { + new_name.replace(pos, 7, "layer.1.DenseReluDense.wi_1."); + } + pos = new_name.find("ffn_down."); + if (pos != std::string::npos) { + new_name.replace(pos, 9, "layer.1.DenseReluDense.wo."); + } + pos = new_name.find("ffn_gate."); + if (pos != std::string::npos) { + new_name.replace(pos, 9, "layer.1.DenseReluDense.wi_0."); + } + pos = new_name.find("attn_rel_b."); + if (pos != std::string::npos) { + new_name.replace(pos, 11, "layer.0.SelfAttention.relative_attention_bias."); + } + } else if (name == "text_encoders.t5xxl.transformer.token_embd.weight") { + new_name = "text_encoders.t5xxl.transformer.shared.weight"; + } + if (starts_with(new_name, "conditioner.embedders.0.open_clip.")) { prefix = "cond_stage_model."; new_name = new_name.substr(strlen("conditioner.embedders.0.open_clip."));