diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 174ef68ae..29a15086c 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -2722,6 +2722,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima return true; } +static bool avoid_problematic_indivisible = true; bool clip_model_quantize(const char * fname_inp, const char * fname_out, const int itype) { ggml_type type = GGML_TYPE_Q4_1; @@ -2782,6 +2783,15 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i // quantize only 2D tensors quantize &= (ggml_n_dims(cur) == 2); + //kcpp fix: do not quantize certain tensors if they are indivisible! + if(avoid_problematic_indivisible) + { + if(name=="v.position_embd.weight") + { + quantize = false; + } + } + if (quantize) { new_type = type; if (new_type >= GGML_TYPE_Q2_K && name.find("embd") != std::string::npos) {