clip quantize skip problematic layer

This commit is contained in:
Concedo 2024-12-19 16:25:48 +08:00
parent ee486bad3e
commit fbf1345a66

View file

@ -2722,6 +2722,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
return true; return true;
} }
static bool avoid_problematic_indivisible = true;
bool clip_model_quantize(const char * fname_inp, const char * fname_out, const int itype) { bool clip_model_quantize(const char * fname_inp, const char * fname_out, const int itype) {
ggml_type type = GGML_TYPE_Q4_1; ggml_type type = GGML_TYPE_Q4_1;
@ -2782,6 +2783,15 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
// quantize only 2D tensors // quantize only 2D tensors
quantize &= (ggml_n_dims(cur) == 2); quantize &= (ggml_n_dims(cur) == 2);
//kcpp fix: do not quantize certain tensors if they are indivisible!
if(avoid_problematic_indivisible)
{
if(name=="v.position_embd.weight")
{
quantize = false;
}
}
if (quantize) { if (quantize) {
new_type = type; new_type = type;
if (new_type >= GGML_TYPE_Q2_K && name.find("embd") != std::string::npos) { if (new_type >= GGML_TYPE_Q2_K && name.find("embd") != std::string::npos) {