mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-10 04:00:53 +00:00
vulkan: enable mmvq for q2_k on NVIDIA (#17675)
This commit is contained in:
parent
93bb92664e
commit
6ab0d64960
1 changed files with 4 additions and 0 deletions
|
|
@ -6948,6 +6948,10 @@ static bool ggml_vk_should_use_mmvq(const vk_device& device, uint32_t m, uint32_
|
|||
// Quantization overhead is not worth it for small k
|
||||
switch (device->vendor_id) {
|
||||
case VK_VENDOR_ID_NVIDIA:
|
||||
if (src0_type == GGML_TYPE_Q2_K) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (k <= 4096) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue