mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-16 19:59:16 +00:00
use original precision for q3tts
This commit is contained in:
parent
2741d7e7bd
commit
eca9f4c1df
1 changed files with 15 additions and 10 deletions
|
|
@ -1265,8 +1265,9 @@ struct ggml_cgraph * TTSTransformer::build_prefill_forward_graph(int32_t n_token
|
|||
|
||||
cur = ggml_mul(ctx0, gate, up);
|
||||
|
||||
struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
|
||||
cur = mul_mat(ctx0, ffn_down_f32, cur);
|
||||
// struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
|
||||
// cur = mul_mat(ctx0, ffn_down_f32, cur);
|
||||
cur = mul_mat(ctx0, layer.ffn_down, cur);
|
||||
|
||||
inpL = ggml_add(ctx0, cur, inpFF);
|
||||
}
|
||||
|
|
@ -1410,8 +1411,9 @@ struct ggml_cgraph * TTSTransformer::build_step_graph(int32_t n_past) {
|
|||
|
||||
cur = ggml_mul(ctx0, gate, up);
|
||||
|
||||
struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
|
||||
cur = mul_mat(ctx0, ffn_down_f32, cur);
|
||||
// struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
|
||||
// cur = mul_mat(ctx0, ffn_down_f32, cur);
|
||||
cur = mul_mat(ctx0, layer.ffn_down, cur);
|
||||
|
||||
inpL = ggml_add(ctx0, cur, inpFF);
|
||||
}
|
||||
|
|
@ -1546,8 +1548,9 @@ struct ggml_cgraph * TTSTransformer::build_code_pred_graph(int32_t n_prev_codes)
|
|||
|
||||
cur = ggml_mul(ctx0, gate, up);
|
||||
|
||||
struct ggml_tensor * old_ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
|
||||
cur = mul_mat(ctx0, old_ffn_down_f32, cur);
|
||||
// struct ggml_tensor * old_ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
|
||||
// cur = mul_mat(ctx0, old_ffn_down_f32, cur);
|
||||
cur = mul_mat(ctx0, layer.ffn_down, cur);
|
||||
|
||||
inpL = ggml_add(ctx0, cur, inpFF);
|
||||
}
|
||||
|
|
@ -1703,8 +1706,9 @@ struct ggml_cgraph * TTSTransformer::build_code_pred_prefill_graph() {
|
|||
|
||||
cur = ggml_mul(ctx0, gate, up);
|
||||
|
||||
struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
|
||||
cur = mul_mat(ctx0, ffn_down_f32, cur);
|
||||
// struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
|
||||
// cur = mul_mat(ctx0, ffn_down_f32, cur);
|
||||
cur = mul_mat(ctx0, layer.ffn_down, cur);
|
||||
|
||||
inpL = ggml_add(ctx0, cur, inpFF);
|
||||
}
|
||||
|
|
@ -1875,8 +1879,9 @@ struct ggml_cgraph * TTSTransformer::build_code_pred_step_graph(int32_t n_past,
|
|||
|
||||
cur = ggml_mul(ctx0, gate, up);
|
||||
|
||||
struct ggml_tensor * step_ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
|
||||
cur = mul_mat(ctx0, step_ffn_down_f32, cur);
|
||||
// struct ggml_tensor * step_ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
|
||||
// cur = mul_mat(ctx0, step_ffn_down_f32, cur);
|
||||
cur = mul_mat(ctx0, layer.ffn_down, cur);
|
||||
|
||||
inpL = ggml_add(ctx0, cur, inpFF);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue