mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
merge base support for chroma, however its not working correctly
This commit is contained in:
parent
dcf88d6e78
commit
30cf433ab4
5 changed files with 554 additions and 105 deletions
|
@ -458,8 +458,8 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
if (sd_version_is_sdxl(version)) {
|
||||
text_model2->compute(n_threads,
|
||||
input_ids2,
|
||||
num_custom_embeddings,
|
||||
token_embed_custom.data(),
|
||||
0,
|
||||
NULL,
|
||||
max_token_idx,
|
||||
false,
|
||||
&chunk_hidden_states2, work_ctx);
|
||||
|
@ -469,8 +469,8 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
if (chunk_idx == 0) {
|
||||
text_model2->compute(n_threads,
|
||||
input_ids2,
|
||||
num_custom_embeddings,
|
||||
token_embed_custom.data(),
|
||||
0,
|
||||
NULL,
|
||||
max_token_idx,
|
||||
true,
|
||||
&pooled,
|
||||
|
@ -747,7 +747,7 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||
|
||||
clip_l_tokenizer.pad_tokens(clip_l_tokens, clip_l_weights, max_length, padding);
|
||||
clip_g_tokenizer.pad_tokens(clip_g_tokens, clip_g_weights, max_length, padding);
|
||||
t5_tokenizer.pad_tokens(t5_tokens, t5_weights, max_length, padding);
|
||||
t5_tokenizer.pad_tokens(t5_tokens, t5_weights, NULL, max_length, padding);
|
||||
|
||||
// for (int i = 0; i < clip_l_tokens.size(); i++) {
|
||||
// std::cout << clip_l_tokens[i] << ":" << clip_l_weights[i] << ", ";
|
||||
|
@ -1004,6 +1004,7 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
T5UniGramTokenizer t5_tokenizer;
|
||||
std::shared_ptr<CLIPTextModelRunner> clip_l;
|
||||
std::shared_ptr<T5Runner> t5;
|
||||
size_t chunk_len = 256;
|
||||
|
||||
FluxCLIPEmbedder(ggml_backend_t backend,
|
||||
std::map<std::string, enum ggml_type>& tensor_types,
|
||||
|
@ -1077,7 +1078,7 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
}
|
||||
|
||||
clip_l_tokenizer.pad_tokens(clip_l_tokens, clip_l_weights, 77, padding);
|
||||
t5_tokenizer.pad_tokens(t5_tokens, t5_weights, max_length, padding);
|
||||
t5_tokenizer.pad_tokens(t5_tokens, t5_weights, NULL, max_length, padding);
|
||||
|
||||
// for (int i = 0; i < clip_l_tokens.size(); i++) {
|
||||
// std::cout << clip_l_tokens[i] << ":" << clip_l_weights[i] << ", ";
|
||||
|
@ -1109,7 +1110,6 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
struct ggml_tensor* pooled = NULL; // [768,]
|
||||
std::vector<float> hidden_states_vec;
|
||||
|
||||
size_t chunk_len = 256;
|
||||
size_t chunk_count = t5_tokens.size() / chunk_len;
|
||||
for (int chunk_idx = 0; chunk_idx < chunk_count; chunk_idx++) {
|
||||
// clip_l
|
||||
|
@ -1196,7 +1196,226 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||
int height,
|
||||
int adm_in_channels = -1,
|
||||
bool force_zero_embeddings = false) {
|
||||
auto tokens_and_weights = tokenize(text, 256, true);
|
||||
auto tokens_and_weights = tokenize(text, chunk_len, true);
|
||||
return get_learned_condition_common(work_ctx, n_threads, tokens_and_weights, clip_skip, force_zero_embeddings);
|
||||
}
|
||||
|
||||
std::tuple<SDCondition, std::vector<bool>> get_learned_condition_with_trigger(ggml_context* work_ctx,
|
||||
int n_threads,
|
||||
const std::string& text,
|
||||
int clip_skip,
|
||||
int width,
|
||||
int height,
|
||||
int num_input_imgs,
|
||||
int adm_in_channels = -1,
|
||||
bool force_zero_embeddings = false) {
|
||||
GGML_ASSERT(0 && "Not implemented yet!");
|
||||
}
|
||||
|
||||
std::string remove_trigger_from_prompt(ggml_context* work_ctx,
|
||||
const std::string& prompt) {
|
||||
GGML_ASSERT(0 && "Not implemented yet!");
|
||||
}
|
||||
};
|
||||
|
||||
struct PixArtCLIPEmbedder : public Conditioner {
|
||||
T5UniGramTokenizer t5_tokenizer;
|
||||
std::shared_ptr<T5Runner> t5;
|
||||
size_t chunk_len = 512;
|
||||
|
||||
PixArtCLIPEmbedder(ggml_backend_t backend,
|
||||
std::map<std::string, enum ggml_type>& tensor_types,
|
||||
int clip_skip = -1) {
|
||||
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
||||
}
|
||||
|
||||
void set_clip_skip(int clip_skip) {
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
t5->get_param_tensors(tensors, "text_encoders.t5xxl.transformer");
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
t5->alloc_params_buffer();
|
||||
}
|
||||
|
||||
void free_params_buffer() {
|
||||
t5->free_params_buffer();
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
size_t buffer_size = 0;
|
||||
|
||||
buffer_size += t5->get_params_buffer_size();
|
||||
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
std::tuple<std::vector<int>, std::vector<float>, std::vector<float>> tokenize(std::string text,
|
||||
size_t max_length = 0,
|
||||
bool padding = false) {
|
||||
auto parsed_attention = parse_prompt_attention(text);
|
||||
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "[";
|
||||
for (const auto& item : parsed_attention) {
|
||||
ss << "['" << item.first << "', " << item.second << "], ";
|
||||
}
|
||||
ss << "]";
|
||||
LOG_DEBUG("parse '%s' to %s", text.c_str(), ss.str().c_str());
|
||||
}
|
||||
|
||||
auto on_new_token_cb = [&](std::string& str, std::vector<int32_t>& bpe_tokens) -> bool {
|
||||
return false;
|
||||
};
|
||||
|
||||
std::vector<int> t5_tokens;
|
||||
std::vector<float> t5_weights;
|
||||
std::vector<float> t5_mask;
|
||||
for (const auto& item : parsed_attention) {
|
||||
const std::string& curr_text = item.first;
|
||||
float curr_weight = item.second;
|
||||
|
||||
std::vector<int> curr_tokens = t5_tokenizer.Encode(curr_text, true);
|
||||
t5_tokens.insert(t5_tokens.end(), curr_tokens.begin(), curr_tokens.end());
|
||||
t5_weights.insert(t5_weights.end(), curr_tokens.size(), curr_weight);
|
||||
}
|
||||
|
||||
t5_tokenizer.pad_tokens(t5_tokens, t5_weights, &t5_mask, max_length, padding);
|
||||
|
||||
return {t5_tokens, t5_weights, t5_mask};
|
||||
}
|
||||
|
||||
void modify_mask_to_attend_padding(struct ggml_tensor* mask, int max_seq_length, int num_extra_padding = 8) {
|
||||
float* mask_data = (float*)mask->data;
|
||||
int num_pad = 0;
|
||||
for (int64_t i = 0; i < max_seq_length; i++) {
|
||||
if (num_pad >= num_extra_padding) {
|
||||
break;
|
||||
}
|
||||
if (std::isinf(mask_data[i])) {
|
||||
mask_data[i] = 0;
|
||||
++num_pad;
|
||||
}
|
||||
}
|
||||
// LOG_DEBUG("PAD: %d", num_pad);
|
||||
}
|
||||
|
||||
SDCondition get_learned_condition_common(ggml_context* work_ctx,
|
||||
int n_threads,
|
||||
std::tuple<std::vector<int>, std::vector<float>, std::vector<float>> token_and_weights,
|
||||
int clip_skip,
|
||||
bool force_zero_embeddings = false) {
|
||||
auto& t5_tokens = std::get<0>(token_and_weights);
|
||||
auto& t5_weights = std::get<1>(token_and_weights);
|
||||
auto& t5_attn_mask_vec = std::get<2>(token_and_weights);
|
||||
|
||||
int64_t t0 = ggml_time_ms();
|
||||
struct ggml_tensor* hidden_states = NULL; // [N, n_token, 4096]
|
||||
struct ggml_tensor* chunk_hidden_states = NULL; // [n_token, 4096]
|
||||
struct ggml_tensor* pooled = NULL; // [768,]
|
||||
struct ggml_tensor* t5_attn_mask = vector_to_ggml_tensor(work_ctx, t5_attn_mask_vec); // [768,]
|
||||
|
||||
std::vector<float> hidden_states_vec;
|
||||
|
||||
size_t chunk_count = t5_tokens.size() / chunk_len;
|
||||
|
||||
bool use_mask = true;
|
||||
const char* SD_CHROMA_USE_T5_MASK = getenv("SD_CHROMA_USE_T5_MASK");
|
||||
if (SD_CHROMA_USE_T5_MASK != nullptr) {
|
||||
std::string sd_chroma_use_t5_mask_str = SD_CHROMA_USE_T5_MASK;
|
||||
if (sd_chroma_use_t5_mask_str == "OFF" || sd_chroma_use_t5_mask_str == "FALSE") {
|
||||
use_mask = false;
|
||||
} else if (sd_chroma_use_t5_mask_str != "ON" && sd_chroma_use_t5_mask_str != "TRUE") {
|
||||
LOG_WARN("SD_CHROMA_USE_T5_MASK environment variable has unexpected value. Assuming default (\"ON\"). (Expected \"ON\"/\"TRUE\" or\"OFF\"/\"FALSE\", got \"%s\")", SD_CHROMA_USE_T5_MASK);
|
||||
}
|
||||
}
|
||||
for (int chunk_idx = 0; chunk_idx < chunk_count; chunk_idx++) {
|
||||
// t5
|
||||
std::vector<int> chunk_tokens(t5_tokens.begin() + chunk_idx * chunk_len,
|
||||
t5_tokens.begin() + (chunk_idx + 1) * chunk_len);
|
||||
std::vector<float> chunk_weights(t5_weights.begin() + chunk_idx * chunk_len,
|
||||
t5_weights.begin() + (chunk_idx + 1) * chunk_len);
|
||||
std::vector<float> chunk_mask(t5_attn_mask_vec.begin() + chunk_idx * chunk_len,
|
||||
t5_attn_mask_vec.begin() + (chunk_idx + 1) * chunk_len);
|
||||
|
||||
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, chunk_tokens);
|
||||
auto t5_attn_mask_chunk = use_mask ? vector_to_ggml_tensor(work_ctx, chunk_mask) : NULL;
|
||||
|
||||
t5->compute(n_threads,
|
||||
input_ids,
|
||||
&chunk_hidden_states,
|
||||
work_ctx,
|
||||
t5_attn_mask_chunk);
|
||||
{
|
||||
auto tensor = chunk_hidden_states;
|
||||
float original_mean = ggml_tensor_mean(tensor);
|
||||
for (int i2 = 0; i2 < tensor->ne[2]; i2++) {
|
||||
for (int i1 = 0; i1 < tensor->ne[1]; i1++) {
|
||||
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
|
||||
float value = ggml_tensor_get_f32(tensor, i0, i1, i2);
|
||||
value *= chunk_weights[i1];
|
||||
ggml_tensor_set_f32(tensor, value, i0, i1, i2);
|
||||
}
|
||||
}
|
||||
}
|
||||
float new_mean = ggml_tensor_mean(tensor);
|
||||
ggml_tensor_scale(tensor, (original_mean / new_mean));
|
||||
}
|
||||
|
||||
int64_t t1 = ggml_time_ms();
|
||||
LOG_DEBUG("computing condition graph completed, taking %" PRId64 " ms", t1 - t0);
|
||||
if (force_zero_embeddings) {
|
||||
float* vec = (float*)chunk_hidden_states->data;
|
||||
for (int i = 0; i < ggml_nelements(chunk_hidden_states); i++) {
|
||||
vec[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
hidden_states_vec.insert(hidden_states_vec.end(),
|
||||
(float*)chunk_hidden_states->data,
|
||||
((float*)chunk_hidden_states->data) + ggml_nelements(chunk_hidden_states));
|
||||
}
|
||||
|
||||
if (hidden_states_vec.size() > 0) {
|
||||
hidden_states = vector_to_ggml_tensor(work_ctx, hidden_states_vec);
|
||||
hidden_states = ggml_reshape_2d(work_ctx,
|
||||
hidden_states,
|
||||
chunk_hidden_states->ne[0],
|
||||
ggml_nelements(hidden_states) / chunk_hidden_states->ne[0]);
|
||||
} else {
|
||||
hidden_states = ggml_new_tensor_2d(work_ctx, GGML_TYPE_F32, 4096, 256);
|
||||
ggml_set_f32(hidden_states, 0.f);
|
||||
}
|
||||
|
||||
int mask_pad = 1;
|
||||
const char* SD_CHROMA_MASK_PAD_OVERRIDE = getenv("SD_CHROMA_MASK_PAD_OVERRIDE");
|
||||
if (SD_CHROMA_MASK_PAD_OVERRIDE != nullptr) {
|
||||
std::string mask_pad_str = SD_CHROMA_MASK_PAD_OVERRIDE;
|
||||
try {
|
||||
mask_pad = std::stoi(mask_pad_str);
|
||||
} catch (const std::invalid_argument&) {
|
||||
LOG_WARN("SD_CHROMA_MASK_PAD_OVERRIDE environment variable is not a valid integer (%s). Falling back to default (%d)", SD_CHROMA_MASK_PAD_OVERRIDE, mask_pad);
|
||||
} catch (const std::out_of_range&) {
|
||||
LOG_WARN("SD_CHROMA_MASK_PAD_OVERRIDE environment variable value is out of range for `int` type (%s). Falling back to default (%d)", SD_CHROMA_MASK_PAD_OVERRIDE, mask_pad);
|
||||
}
|
||||
}
|
||||
modify_mask_to_attend_padding(t5_attn_mask, ggml_nelements(t5_attn_mask), mask_pad);
|
||||
|
||||
return SDCondition(hidden_states, t5_attn_mask, NULL);
|
||||
}
|
||||
|
||||
SDCondition get_learned_condition(ggml_context* work_ctx,
|
||||
int n_threads,
|
||||
const std::string& text,
|
||||
int clip_skip,
|
||||
int width,
|
||||
int height,
|
||||
int adm_in_channels = -1,
|
||||
bool force_zero_embeddings = false) {
|
||||
auto tokens_and_weights = tokenize(text, chunk_len, true);
|
||||
return get_learned_condition_common(work_ctx, n_threads, tokens_and_weights, clip_skip, force_zero_embeddings);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue