diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 490ae73e4..0a33355a3 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -208,7 +208,7 @@ static ggml_cuda_device_info ggml_cuda_init() { //#else // GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__); //#endif // GGML_CUDA_FORCE_CUBLAS - GGML_LOG_INFO("---\nInitializing CUDA/HIP, please wait, the following step may take a few minutes (only for first launch)...\n---\n"); + GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count); std::vector> turing_devices_without_mma; diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index d6b1e5644..b2357226c 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -2217,6 +2217,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in printf("CUDA: Set main device to %d\n",kcpp_parseinfo_maindevice); } printf("CUDA MMQ: %s\n",(inputs.use_mmq?"True":"False")); + printf("---\nInitializing CUDA/HIP, please wait, the following step may take a few minutes (only for first launch)...\n---\n"); ggml_cuda_set_mul_mat_q(inputs.use_mmq); #endif if((file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2 || file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL) && !kcpp_data->flash_attn)