sync with sd.cpp

This commit is contained in:
Concedo 2025-06-30 00:10:51 +08:00
parent e5af9b5ea9
commit 186227fc26
8 changed files with 234 additions and 82 deletions

View file

@ -597,7 +597,6 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
GGML_ASSERT(it != tokens.end()); // prompt must have trigger word
tokens.erase(it);
return decode(tokens);
//return prompt; //kcpp we don't care about photomaker trigger words
}
SDCondition get_learned_condition(ggml_context* work_ctx,
@ -903,6 +902,7 @@ struct SD3CLIPEmbedder : public Conditioner {
t5->compute(n_threads,
input_ids,
NULL,
&chunk_hidden_states_t5,
work_ctx);
{
@ -1148,6 +1148,7 @@ struct FluxCLIPEmbedder : public Conditioner {
t5->compute(n_threads,
input_ids,
NULL,
&chunk_hidden_states,
work_ctx);
{
@ -1223,10 +1224,15 @@ struct PixArtCLIPEmbedder : public Conditioner {
T5UniGramTokenizer t5_tokenizer;
std::shared_ptr<T5Runner> t5;
size_t chunk_len = 512;
bool use_mask = false;
int mask_pad = 1;
PixArtCLIPEmbedder(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types,
int clip_skip = -1) {
int clip_skip = -1,
bool use_mask = false,
int mask_pad = 1)
: use_mask(use_mask), mask_pad(mask_pad) {
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
}
@ -1323,16 +1329,6 @@ struct PixArtCLIPEmbedder : public Conditioner {
size_t chunk_count = t5_tokens.size() / chunk_len;
bool use_mask = false;
const char* SD_CHROMA_USE_T5_MASK = getenv("SD_CHROMA_USE_T5_MASK");
if (SD_CHROMA_USE_T5_MASK != nullptr) {
std::string sd_chroma_use_t5_mask_str = SD_CHROMA_USE_T5_MASK;
if (sd_chroma_use_t5_mask_str == "ON" || sd_chroma_use_t5_mask_str == "TRUE") {
use_mask = true;
} else if (sd_chroma_use_t5_mask_str != "OFF" && sd_chroma_use_t5_mask_str != "FALSE") {
LOG_WARN("SD_CHROMA_USE_T5_MASK environment variable has unexpected value. Assuming default (\"OFF\"). (Expected \"OFF\"/\"FALSE\" or\"ON\"/\"TRUE\", got \"%s\")", SD_CHROMA_USE_T5_MASK);
}
}
for (int chunk_idx = 0; chunk_idx < chunk_count; chunk_idx++) {
// t5
std::vector<int> chunk_tokens(t5_tokens.begin() + chunk_idx * chunk_len,
@ -1347,9 +1343,9 @@ struct PixArtCLIPEmbedder : public Conditioner {
t5->compute(n_threads,
input_ids,
t5_attn_mask_chunk,
&chunk_hidden_states,
work_ctx,
t5_attn_mask_chunk);
work_ctx);
{
auto tensor = chunk_hidden_states;
float original_mean = ggml_tensor_mean(tensor);
@ -1391,18 +1387,6 @@ struct PixArtCLIPEmbedder : public Conditioner {
ggml_set_f32(hidden_states, 0.f);
}
int mask_pad = 1;
const char* SD_CHROMA_MASK_PAD_OVERRIDE = getenv("SD_CHROMA_MASK_PAD_OVERRIDE");
if (SD_CHROMA_MASK_PAD_OVERRIDE != nullptr) {
std::string mask_pad_str = SD_CHROMA_MASK_PAD_OVERRIDE;
try {
mask_pad = std::stoi(mask_pad_str);
} catch (const std::invalid_argument&) {
LOG_WARN("SD_CHROMA_MASK_PAD_OVERRIDE environment variable is not a valid integer (%s). Falling back to default (%d)", SD_CHROMA_MASK_PAD_OVERRIDE, mask_pad);
} catch (const std::out_of_range&) {
LOG_WARN("SD_CHROMA_MASK_PAD_OVERRIDE environment variable value is out of range for `int` type (%s). Falling back to default (%d)", SD_CHROMA_MASK_PAD_OVERRIDE, mask_pad);
}
}
modify_mask_to_attend_padding(t5_attn_mask, ggml_nelements(t5_attn_mask), mask_pad);
return SDCondition(hidden_states, t5_attn_mask, NULL);