mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
some cleanup before starting on TTS
This commit is contained in:
parent
93b2bebc2f
commit
bd38665e1f
3 changed files with 16 additions and 15 deletions
|
@ -397,7 +397,10 @@ static ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_t
|
|||
case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;
|
||||
case GGML_TYPE_Q5_K: new_type = GGML_TYPE_Q5_1; break;
|
||||
case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break;
|
||||
default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
|
||||
default:
|
||||
printf("\nUnsupported tensor size encountered! Will use %s for %s\n",ggml_type_name(tensor->type),name.c_str()) ;
|
||||
new_type = tensor->type;
|
||||
break;
|
||||
}
|
||||
if (tensor->ne[0] % ggml_blck_size(new_type) != 0) {
|
||||
new_type = GGML_TYPE_F16;
|
||||
|
@ -629,7 +632,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
|||
if (llama_model_has_encoder(&model)) {
|
||||
n_attn_layer *= 3;
|
||||
}
|
||||
GGML_ASSERT((qs.n_attention_wv == n_attn_layer) && "n_attention_wv is unexpected");
|
||||
GGML_ASSERT_CONTINUE((qs.n_attention_wv == n_attn_layer) && "n_attention_wv is unexpected");
|
||||
}
|
||||
|
||||
size_t total_size_org = 0;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue