From eca9f4c1dfc421d5ae924739eea71448b5e76fe5 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Thu, 30 Apr 2026 17:28:11 +0800
Subject: [PATCH] use original precision for q3tts

---
 otherarch/qwen3tts/tts_transformer.cpp | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/otherarch/qwen3tts/tts_transformer.cpp b/otherarch/qwen3tts/tts_transformer.cpp
index 686322807..6cba48393 100644
--- a/otherarch/qwen3tts/tts_transformer.cpp
+++ b/otherarch/qwen3tts/tts_transformer.cpp
@@ -1265,8 +1265,9 @@ struct ggml_cgraph * TTSTransformer::build_prefill_forward_graph(int32_t n_token
 
         cur = ggml_mul(ctx0, gate, up);
 
-        struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
-        cur = mul_mat(ctx0, ffn_down_f32, cur);
+        // struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
+        // cur = mul_mat(ctx0, ffn_down_f32, cur);
+        cur = mul_mat(ctx0, layer.ffn_down, cur);
 
         inpL = ggml_add(ctx0, cur, inpFF);
     }
@@ -1410,8 +1411,9 @@ struct ggml_cgraph * TTSTransformer::build_step_graph(int32_t n_past) {
 
         cur = ggml_mul(ctx0, gate, up);
 
-        struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
-        cur = mul_mat(ctx0, ffn_down_f32, cur);
+        // struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
+        // cur = mul_mat(ctx0, ffn_down_f32, cur);
+        cur = mul_mat(ctx0, layer.ffn_down, cur);
 
         inpL = ggml_add(ctx0, cur, inpFF);
     }
@@ -1546,8 +1548,9 @@ struct ggml_cgraph * TTSTransformer::build_code_pred_graph(int32_t n_prev_codes)
 
         cur = ggml_mul(ctx0, gate, up);
 
-        struct ggml_tensor * old_ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
-        cur = mul_mat(ctx0, old_ffn_down_f32, cur);
+        // struct ggml_tensor * old_ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
+        // cur = mul_mat(ctx0, old_ffn_down_f32, cur);
+        cur = mul_mat(ctx0, layer.ffn_down, cur);
 
         inpL = ggml_add(ctx0, cur, inpFF);
     }
@@ -1703,8 +1706,9 @@ struct ggml_cgraph * TTSTransformer::build_code_pred_prefill_graph() {
 
         cur = ggml_mul(ctx0, gate, up);
 
-        struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
-        cur = mul_mat(ctx0, ffn_down_f32, cur);
+        // struct ggml_tensor * ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
+        // cur = mul_mat(ctx0, ffn_down_f32, cur);
+        cur = mul_mat(ctx0, layer.ffn_down, cur);
 
         inpL = ggml_add(ctx0, cur, inpFF);
     }
@@ -1875,8 +1879,9 @@ struct ggml_cgraph * TTSTransformer::build_code_pred_step_graph(int32_t n_past,
 
         cur = ggml_mul(ctx0, gate, up);
 
-        struct ggml_tensor * step_ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
-        cur = mul_mat(ctx0, step_ffn_down_f32, cur);
+        // struct ggml_tensor * step_ffn_down_f32 = ggml_cast(ctx0, layer.ffn_down, GGML_TYPE_F32);
+        // cur = mul_mat(ctx0, step_ffn_down_f32, cur);
+        cur = mul_mat(ctx0, layer.ffn_down, cur);
 
         inpL = ggml_add(ctx0, cur, inpFF);
     }