From 6e42e0ebb644b50d6368b9cbc8fadec6a6e8eb32 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Thu, 14 May 2026 18:56:57 +0800
Subject: [PATCH] revert to use upstream fix

---
 ggml/src/ggml-cuda/allreduce.cu | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)
diff --git a/ggml/src/ggml-cuda/allreduce.cu b/ggml/src/ggml-cuda/allreduce.cu
index d887dc231..434689abd 100644
--- a/ggml/src/ggml-cuda/allreduce.cu
+++ b/ggml/src/ggml-cuda/allreduce.cu
@@ -184,21 +184,13 @@ static __global__ void ggml_cuda_ar_kernel(
             #pragma unroll
             for (int k = 0; k < ELEMS_PER_VEC; ++k) {
                 const T_wire d_low = ggml_cuda_cast<T_wire>(sendbuf[off + k]);
-
-                const float a = ggml_cuda_cast<float>(d_low);
-                const float b = ggml_cuda_cast<float>(wire[k]);
-
-                recvbuf[off + k] = ggml_cuda_cast<T_dst>(a + b);
+                recvbuf[off + k] = ggml_cuda_cast<T_dst>(d_low) + ggml_cuda_cast<T_dst>(wire[k]);
             }
         }
-
         if (bid == 0 && tid < count - tail) {
             const T_wire d_low = ggml_cuda_cast<T_wire>(sendbuf[tail + tid]);
-
-            const float a = ggml_cuda_cast<float>(d_low);
-            const float b = ggml_cuda_cast<float>(host_other[tail + tid]);
-
-            recvbuf[tail + tid] = ggml_cuda_cast<T_dst>(a + b);
+            recvbuf[tail + tid] =
+                ggml_cuda_cast<T_dst>(d_low) + ggml_cuda_cast<T_dst>(host_other[tail + tid]);
         }
     }
 }
@@ -218,9 +210,7 @@ static __global__ void ggml_cuda_ar_add_kernel(
     const int nt  = gridDim.x * blockDim.x;
     for (int i = tid; i < count; i += nt) {
         const T_src d_low = ggml_cuda_cast<T_src>(dst[i]);
-        const float a = (float) ggml_cuda_cast<float>(d_low);
-        const float b = (float) ggml_cuda_cast<float>(src[i]);
-        dst[i] = ggml_cuda_cast<T_dst>(a + b);
+        dst[i] = ggml_cuda_cast<T_dst>(d_low) + ggml_cuda_cast<T_dst>(src[i]);
     }
 }