mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
vulkan: support q4_0/q8_0 KV in scalar FA
This commit is contained in:
parent
989bfb18fc
commit
e66094276b
3 changed files with 66 additions and 8 deletions
|
@ -1939,6 +1939,8 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
|||
CREATE_FA2(TYPE, NAMELC, SCALAR, SUFFIX, 256)
|
||||
|
||||
CREATE_FA(GGML_TYPE_F16, f16, true, )
|
||||
CREATE_FA(GGML_TYPE_Q4_0, q4_0, true, )
|
||||
CREATE_FA(GGML_TYPE_Q8_0, q8_0, true, )
|
||||
#if defined(VK_NV_cooperative_matrix2) && defined(GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT)
|
||||
if (device->coopmat2) {
|
||||
CREATE_FA(GGML_TYPE_F16, f16, false, _cm2)
|
||||
|
@ -9603,10 +9605,12 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
|
|||
switch (op->src[1]->type) {
|
||||
case GGML_TYPE_F16:
|
||||
case GGML_TYPE_Q4_0:
|
||||
case GGML_TYPE_Q8_0:
|
||||
// supported in scalar and coopmat2 paths
|
||||
break;
|
||||
case GGML_TYPE_Q4_1:
|
||||
case GGML_TYPE_Q5_0:
|
||||
case GGML_TYPE_Q5_1:
|
||||
case GGML_TYPE_Q8_0:
|
||||
// K dequants currently disabled because D dimension is rounded up to 256 and runs inefficiently
|
||||
//case GGML_TYPE_Q2_K:
|
||||
//case GGML_TYPE_Q3_K:
|
||||
|
@ -9622,13 +9626,14 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
|
|||
//case GGML_TYPE_IQ3_S:
|
||||
//case GGML_TYPE_IQ4_XS:
|
||||
case GGML_TYPE_IQ4_NL:
|
||||
// currently supported only in coopmat2 path
|
||||
if (!coopmat2) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
if (!coopmat2 && op->src[1]->type != GGML_TYPE_F16) {
|
||||
return false;
|
||||
}
|
||||
if (!coopmat2 && !device->subgroup_shuffle) {
|
||||
// scalar FA uses subgroupShuffle
|
||||
return false;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue