mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-14 10:59:41 +00:00
CUDA: fix half2 -> half conversion for HIP (#15529)
This commit is contained in:
parent
611f419cff
commit
710dfc465a
1 changed files with 1 additions and 1 deletions
|
@ -258,7 +258,7 @@ static __global__ void flash_attn_tile_ext_f16(
|
||||||
const half val = hexp(sink - kqmax[j0/nwarps]);
|
const half val = hexp(sink - kqmax[j0/nwarps]);
|
||||||
kqsum[j0/nwarps] = kqsum[j0/nwarps] * KQ_max_scale;
|
kqsum[j0/nwarps] = kqsum[j0/nwarps] * KQ_max_scale;
|
||||||
if (threadIdx.x == 0) {
|
if (threadIdx.x == 0) {
|
||||||
kqsum[j0/nwarps].x = __hadd(kqsum[j0/nwarps].x, val);
|
kqsum[j0/nwarps].x = __hadd(__low2half(kqsum[j0/nwarps]), val);
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue