mmq debug log

This commit is contained in:
Concedo 2025-05-09 18:30:11 +08:00
parent 46849e80fb
commit ea2e5ed1e9

View file

@ -2168,8 +2168,9 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
#if defined(GGML_USE_CUDA) #if defined(GGML_USE_CUDA)
if(cu_parseinfo_maindevice>0) if(cu_parseinfo_maindevice>0)
{ {
printf("CUBLAS: Set main device to %d\n",cu_parseinfo_maindevice); printf("CUDA: Set main device to %d\n",cu_parseinfo_maindevice);
} }
printf("CUDA MMQ: %s\n",(inputs.use_mmq?"True":"False"));
ggml_cuda_set_mul_mat_q(inputs.use_mmq); ggml_cuda_set_mul_mat_q(inputs.use_mmq);
#endif #endif
if((file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2 || file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL) && !kcpp_data->flash_attn) if((file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2 || file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL) && !kcpp_data->flash_attn)