diff --git a/otherarch/qwen3tts/tts_transformer.cpp b/otherarch/qwen3tts/tts_transformer.cpp index 686322807..6cba48393 100644 --- a/otherarch/qwen3tts/tts_transformer.cpp +++ b/otherarch/qwen3tts/tts_transformer.cpp @@ -1265,8 +1265,9 @@ struct ggml_cgraph * TTSTransformer::build_prefill_forward_graph(int32_t n_token cur = ggml_mul(ctx0, gate, up); - struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32); - cur = mul_mat(ctx0, ffn_down_f32, cur); + // struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32); + // cur = mul_mat(ctx0, ffn_down_f32, cur); + cur = mul_mat(ctx0, layer.ffn_down, cur); inpL = ggml_add(ctx0, cur, inpFF); } @@ -1410,8 +1411,9 @@ struct ggml_cgraph * TTSTransformer::build_step_graph(int32_t n_past) { cur = ggml_mul(ctx0, gate, up); - struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32); - cur = mul_mat(ctx0, ffn_down_f32, cur); + // struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32); + // cur = mul_mat(ctx0, ffn_down_f32, cur); + cur = mul_mat(ctx0, layer.ffn_down, cur); inpL = ggml_add(ctx0, cur, inpFF); } @@ -1546,8 +1548,9 @@ struct ggml_cgraph * TTSTransformer::build_code_pred_graph(int32_t n_prev_codes) cur = ggml_mul(ctx0, gate, up); - struct ggml_tensor * old_ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32); - cur = mul_mat(ctx0, old_ffn_down_f32, cur); + // struct ggml_tensor * old_ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32); + // cur = mul_mat(ctx0, old_ffn_down_f32, cur); + cur = mul_mat(ctx0, layer.ffn_down, cur); inpL = ggml_add(ctx0, cur, inpFF); } @@ -1703,8 +1706,9 @@ struct ggml_cgraph * TTSTransformer::build_code_pred_prefill_graph() { cur = ggml_mul(ctx0, gate, up); - struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32); - cur = mul_mat(ctx0, ffn_down_f32, cur); + // struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32); + // cur = mul_mat(ctx0, ffn_down_f32, cur); + cur = mul_mat(ctx0, layer.ffn_down, cur); inpL = ggml_add(ctx0, cur, inpFF); } @@ -1875,8 +1879,9 @@ struct ggml_cgraph * TTSTransformer::build_code_pred_step_graph(int32_t n_past, cur = ggml_mul(ctx0, gate, up); - struct ggml_tensor * step_ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32); - cur = mul_mat(ctx0, step_ffn_down_f32, cur); + // struct ggml_tensor * step_ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32); + // cur = mul_mat(ctx0, step_ffn_down_f32, cur); + cur = mul_mat(ctx0, layer.ffn_down, cur); inpL = ggml_add(ctx0, cur, inpFF); }