mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-13 02:19:41 +00:00
updated sdcpp prepare for inpaint
fixed img2img (+1 squashed commits) Squashed commits: [42c48f14] try update sdcpp, feels kind of buggy
This commit is contained in:
parent
ebf924c5d1
commit
fea3b2bd4a
18 changed files with 1850 additions and 271 deletions
|
@ -51,7 +51,8 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
|
||||
std::string trigger_word = "img"; // should be user settable
|
||||
std::string embd_dir;
|
||||
int32_t num_custom_embeddings = 0;
|
||||
int32_t num_custom_embeddings = 0;
|
||||
int32_t num_custom_embeddings_2 = 0;
|
||||
std::vector<uint8_t> token_embed_custom;
|
||||
std::vector<std::string> readed_embeddings;
|
||||
|
||||
|
@ -61,18 +62,18 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
SDVersion version = VERSION_SD1,
|
||||
PMVersion pv = PM_VERSION_1,
|
||||
int clip_skip = -1)
|
||||
: version(version), pm_version(pv), tokenizer(version == VERSION_SD2 ? 0 : 49407), embd_dir(embd_dir) {
|
||||
: version(version), pm_version(pv), tokenizer(sd_version_is_sd2(version) ? 0 : 49407), embd_dir(embd_dir) {
|
||||
if (clip_skip <= 0) {
|
||||
clip_skip = 1;
|
||||
if (version == VERSION_SD2 || version == VERSION_SDXL) {
|
||||
if (sd_version_is_sd2(version) || sd_version_is_sdxl(version)) {
|
||||
clip_skip = 2;
|
||||
}
|
||||
}
|
||||
if (version == VERSION_SD1) {
|
||||
if (sd_version_is_sd1(version)) {
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, clip_skip);
|
||||
} else if (version == VERSION_SD2) {
|
||||
} else if (sd_version_is_sd2(version)) {
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14, clip_skip);
|
||||
} else if (version == VERSION_SDXL) {
|
||||
} else if (sd_version_is_sdxl(version)) {
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, clip_skip, false);
|
||||
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, clip_skip, false);
|
||||
}
|
||||
|
@ -80,35 +81,35 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
|
||||
void set_clip_skip(int clip_skip) {
|
||||
text_model->set_clip_skip(clip_skip);
|
||||
if (version == VERSION_SDXL) {
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
text_model2->set_clip_skip(clip_skip);
|
||||
}
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
||||
text_model->get_param_tensors(tensors, "cond_stage_model.transformer.text_model");
|
||||
if (version == VERSION_SDXL) {
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
text_model2->get_param_tensors(tensors, "cond_stage_model.1.transformer.text_model");
|
||||
}
|
||||
}
|
||||
|
||||
void alloc_params_buffer() {
|
||||
text_model->alloc_params_buffer();
|
||||
if (version == VERSION_SDXL) {
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
text_model2->alloc_params_buffer();
|
||||
}
|
||||
}
|
||||
|
||||
void free_params_buffer() {
|
||||
text_model->free_params_buffer();
|
||||
if (version == VERSION_SDXL) {
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
text_model2->free_params_buffer();
|
||||
}
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
size_t buffer_size = text_model->get_params_buffer_size();
|
||||
if (version == VERSION_SDXL) {
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
buffer_size += text_model2->get_params_buffer_size();
|
||||
}
|
||||
return buffer_size;
|
||||
|
@ -131,28 +132,55 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
params.no_alloc = false;
|
||||
struct ggml_context* embd_ctx = ggml_init(params);
|
||||
struct ggml_tensor* embd = NULL;
|
||||
int64_t hidden_size = text_model->model.hidden_size;
|
||||
struct ggml_tensor* embd2 = NULL;
|
||||
auto on_load = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) {
|
||||
if (tensor_storage.ne[0] != hidden_size) {
|
||||
LOG_DEBUG("embedding wrong hidden size, got %i, expected %i", tensor_storage.ne[0], hidden_size);
|
||||
return false;
|
||||
if (tensor_storage.ne[0] != text_model->model.hidden_size) {
|
||||
if (text_model2) {
|
||||
if (tensor_storage.ne[0] == text_model2->model.hidden_size) {
|
||||
embd2 = ggml_new_tensor_2d(embd_ctx, tensor_storage.type, text_model2->model.hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne[1] : 1);
|
||||
*dst_tensor = embd2;
|
||||
} else {
|
||||
LOG_DEBUG("embedding wrong hidden size, got %i, expected %i or %i", tensor_storage.ne[0], text_model->model.hidden_size, text_model2->model.hidden_size);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
LOG_DEBUG("embedding wrong hidden size, got %i, expected %i", tensor_storage.ne[0], text_model->model.hidden_size);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
embd = ggml_new_tensor_2d(embd_ctx, tensor_storage.type, text_model->model.hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne[1] : 1);
|
||||
*dst_tensor = embd;
|
||||
}
|
||||
embd = ggml_new_tensor_2d(embd_ctx, tensor_storage.type, hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne[1] : 1);
|
||||
*dst_tensor = embd;
|
||||
return true;
|
||||
};
|
||||
model_loader.load_tensors(on_load, NULL);
|
||||
readed_embeddings.push_back(embd_name);
|
||||
token_embed_custom.resize(token_embed_custom.size() + ggml_nbytes(embd));
|
||||
memcpy((void*)(token_embed_custom.data() + num_custom_embeddings * hidden_size * ggml_type_size(embd->type)),
|
||||
embd->data,
|
||||
ggml_nbytes(embd));
|
||||
for (int i = 0; i < embd->ne[1]; i++) {
|
||||
bpe_tokens.push_back(text_model->model.vocab_size + num_custom_embeddings);
|
||||
// LOG_DEBUG("new custom token: %i", text_model.vocab_size + num_custom_embeddings);
|
||||
num_custom_embeddings++;
|
||||
if (embd) {
|
||||
int64_t hidden_size = text_model->model.hidden_size;
|
||||
token_embed_custom.resize(token_embed_custom.size() + ggml_nbytes(embd));
|
||||
memcpy((void*)(token_embed_custom.data() + num_custom_embeddings * hidden_size * ggml_type_size(embd->type)),
|
||||
embd->data,
|
||||
ggml_nbytes(embd));
|
||||
for (int i = 0; i < embd->ne[1]; i++) {
|
||||
bpe_tokens.push_back(text_model->model.vocab_size + num_custom_embeddings);
|
||||
// LOG_DEBUG("new custom token: %i", text_model.vocab_size + num_custom_embeddings);
|
||||
num_custom_embeddings++;
|
||||
}
|
||||
LOG_DEBUG("embedding '%s' applied, custom embeddings: %i", embd_name.c_str(), num_custom_embeddings);
|
||||
}
|
||||
if (embd2) {
|
||||
int64_t hidden_size = text_model2->model.hidden_size;
|
||||
token_embed_custom.resize(token_embed_custom.size() + ggml_nbytes(embd2));
|
||||
memcpy((void*)(token_embed_custom.data() + num_custom_embeddings_2 * hidden_size * ggml_type_size(embd2->type)),
|
||||
embd2->data,
|
||||
ggml_nbytes(embd2));
|
||||
for (int i = 0; i < embd2->ne[1]; i++) {
|
||||
bpe_tokens.push_back(text_model2->model.vocab_size + num_custom_embeddings_2);
|
||||
// LOG_DEBUG("new custom token: %i", text_model.vocab_size + num_custom_embeddings);
|
||||
num_custom_embeddings_2++;
|
||||
}
|
||||
LOG_DEBUG("embedding '%s' applied, custom embeddings: %i (text model 2)", embd_name.c_str(), num_custom_embeddings_2);
|
||||
}
|
||||
LOG_DEBUG("embedding '%s' applied, custom embeddings: %i", embd_name.c_str(), num_custom_embeddings);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -402,7 +430,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
auto input_ids = vector_to_ggml_tensor_i32(work_ctx, chunk_tokens);
|
||||
struct ggml_tensor* input_ids2 = NULL;
|
||||
size_t max_token_idx = 0;
|
||||
if (version == VERSION_SDXL) {
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
auto it = std::find(chunk_tokens.begin(), chunk_tokens.end(), tokenizer.EOS_TOKEN_ID);
|
||||
if (it != chunk_tokens.end()) {
|
||||
std::fill(std::next(it), chunk_tokens.end(), 0);
|
||||
|
@ -427,7 +455,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
false,
|
||||
&chunk_hidden_states1,
|
||||
work_ctx);
|
||||
if (version == VERSION_SDXL) {
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
text_model2->compute(n_threads,
|
||||
input_ids2,
|
||||
0,
|
||||
|
@ -486,7 +514,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||
ggml_nelements(hidden_states) / chunk_hidden_states->ne[0]);
|
||||
|
||||
ggml_tensor* vec = NULL;
|
||||
if (version == VERSION_SDXL) {
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
int out_dim = 256;
|
||||
vec = ggml_new_tensor_1d(work_ctx, GGML_TYPE_F32, adm_in_channels);
|
||||
// [0:1280]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue