some cleanup before starting on TTS

2025-09-11 01:24:36 +00:00 · 2025-01-10 22:13:44 +08:00 · 2025-01-10 22:13:44 +08:00 · bd38665e1f
commit bd38665e1f
parent 93b2bebc2f
3 changed files with 16 additions and 15 deletions
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@ -397,7 +397,10 @@ static ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_t
            case GGML_TYPE_Q4_K:   new_type = GGML_TYPE_Q5_0;   break;
            case GGML_TYPE_Q5_K:   new_type = GGML_TYPE_Q5_1;   break;
            case GGML_TYPE_Q6_K:   new_type = GGML_TYPE_Q8_0;   break;
-            default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
+            default:
+                printf("\nUnsupported tensor size encountered! Will use %s for %s\n",ggml_type_name(tensor->type),name.c_str()) ;
+                new_type = tensor->type;
+            break;
        }
        if (tensor->ne[0] % ggml_blck_size(new_type) != 0) {
            new_type = GGML_TYPE_F16;
@ -629,7 +632,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
        if (llama_model_has_encoder(&model)) {
            n_attn_layer *= 3;
        }
-        GGML_ASSERT((qs.n_attention_wv == n_attn_layer) && "n_attention_wv is unexpected");
+        GGML_ASSERT_CONTINUE((qs.n_attention_wv == n_attn_layer) && "n_attention_wv is unexpected");
    }

    size_t total_size_org = 0;