Implement q5_0, q5_1 and q8_0

This commit is contained in:
0cc4m 2023-04-29 07:43:15 +02:00
parent 334637e43e
commit 9439da6f95
2 changed files with 115 additions and 14 deletions

View file

@ -51,7 +51,7 @@ __kernel void dequantize_row_q4_2(__global struct block_q4_2* blocks, __global f
const uint i = get_global_id(0) / 16;
const uint l = get_local_id(0);
const float d = vload_half(0, (__global half*) &blocks[i].d);;
const float d = vload_half(0, (__global half*) &blocks[i].d);
const uchar vi = blocks[i].qs[l];
@ -60,4 +60,73 @@ __kernel void dequantize_row_q4_2(__global struct block_q4_2* blocks, __global f
result[index + 1] = ((vi >> 4) - 8)*d;
}
struct block_q5_0
{
ushort d;
uint qh;
uchar qs[16];
};
__kernel void dequantize_row_q5_0(__global struct block_q5_0* blocks, __global float* result) {
const uint i = get_global_id(0) / 32;
const uint l = get_local_id(0);
const float d = vload_half(0, (__global half*) &blocks[i].d);
const uchar vi = blocks[i].qs[l];
const uint l2 = l * 2;
const uchar vh0 = ((blocks[i].qh & (1 << (l2 + 0))) >> (l2 + 0)) << 4;
const uchar vh1 = ((blocks[i].qh & (1 << (l2 + 1))) >> (l2 + 1)) << 4;
const uint index = i*32 + l2;
result[index + 0] = (((vi & 0xf) | vh0) - 16)*d;
result[index + 1] = (((vi >> 4) | vh1) - 16)*d;
}
struct block_q5_1
{
ushort d;
ushort m;
uint qh;
uchar qs[16];
};
__kernel void dequantize_row_q5_1(__global struct block_q5_1* blocks, __global float* result) {
const uint i = get_global_id(0) / 32;
const uint l = get_local_id(0);
const float d = vload_half(0, (__global half*) &blocks[i].d);
const float m = vload_half(0, (__global half*) &blocks[i].m);
const uchar vi = blocks[i].qs[l];
const uint l2 = l * 2;
const uchar vh0 = ((blocks[i].qh & (1 << (l2 + 0))) >> (l2 + 0)) << 4;
const uchar vh1 = ((blocks[i].qh & (1 << (l2 + 1))) >> (l2 + 1)) << 4;
const uint index = i*32 + l2;
result[index + 0] = ((vi & 0xf) | vh0)*d + m;
result[index + 1] = ((vi >> 4) | vh1)*d + m;
}
struct block_q8_0
{
float d;
uchar qs[32];
};
__kernel void dequantize_row_q8_0(__global struct block_q8_0* blocks, __global float* result) {
const uint i = get_global_id(0) / 32;
const uint l = get_local_id(0);
const float d = blocks[i].d;
const uint index = i*32 + l;
result[index] = blocks[i].qs[l] * d;
}
);