Implement q5_0, q5_1 and q8_0

2025-09-12 18:09:42 +00:00 · 2023-04-29 07:43:15 +02:00 · 2023-04-29 07:43:15 +02:00 · 9439da6f95
commit 9439da6f95
parent 334637e43e
2 changed files with 115 additions and 14 deletions
--- a/ggml-opencl-dequant.cl
+++ b/ggml-opencl-dequant.cl
@ -51,7 +51,7 @@ __kernel void dequantize_row_q4_2(__global struct block_q4_2* blocks, __global f
    const uint i = get_global_id(0) / 16;
    const uint l = get_local_id(0);

-    const float d = vload_half(0, (__global half*) &blocks[i].d);;
+    const float d = vload_half(0, (__global half*) &blocks[i].d);

    const uchar vi = blocks[i].qs[l];

@ -60,4 +60,73 @@ __kernel void dequantize_row_q4_2(__global struct block_q4_2* blocks, __global f
    result[index + 1] = ((vi >> 4) - 8)*d;
 }

+
+struct block_q5_0
+{
+    ushort d;
+    uint qh;
+    uchar qs[16];
+};
+
+__kernel void dequantize_row_q5_0(__global struct block_q5_0* blocks, __global float* result) {
+    const uint i = get_global_id(0) / 32;
+    const uint l = get_local_id(0);
+
+    const float d = vload_half(0, (__global half*) &blocks[i].d);
+
+    const uchar vi = blocks[i].qs[l];
+
+    const uint l2 = l * 2;
+
+    const uchar vh0 = ((blocks[i].qh & (1 << (l2 + 0))) >> (l2 + 0)) << 4;
+    const uchar vh1 = ((blocks[i].qh & (1 << (l2 + 1))) >> (l2 + 1)) << 4;
+
+    const uint index = i*32 + l2;
+    result[index + 0] = (((vi & 0xf) | vh0) - 16)*d;
+    result[index + 1] = (((vi >>  4) | vh1) - 16)*d;
+}
+
+struct block_q5_1
+{
+    ushort d;
+    ushort m;
+    uint qh;
+    uchar qs[16];
+};
+
+__kernel void dequantize_row_q5_1(__global struct block_q5_1* blocks, __global float* result) {
+    const uint i = get_global_id(0) / 32;
+    const uint l = get_local_id(0);
+
+    const float d = vload_half(0, (__global half*) &blocks[i].d);
+    const float m = vload_half(0, (__global half*) &blocks[i].m);
+
+    const uchar vi = blocks[i].qs[l];
+
+    const uint l2 = l * 2;
+
+    const uchar vh0 = ((blocks[i].qh & (1 << (l2 + 0))) >> (l2 + 0)) << 4;
+    const uchar vh1 = ((blocks[i].qh & (1 << (l2 + 1))) >> (l2 + 1)) << 4;
+
+    const uint index = i*32 + l2;
+    result[index + 0] = ((vi & 0xf) | vh0)*d + m;
+    result[index + 1] = ((vi >>  4) | vh1)*d + m;
+}
+
+struct block_q8_0
+{
+    float d;
+    uchar qs[32];
+};
+
+__kernel void dequantize_row_q8_0(__global struct block_q8_0* blocks, __global float* result) {
+    const uint i = get_global_id(0) / 32;
+    const uint l = get_local_id(0);
+
+    const float d = blocks[i].d;
+
+    const uint index = i*32 + l;
+    result[index] = blocks[i].qs[l] * d;
+}
+
 );