diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index aaf58edf4..cb495817a 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -392,7 +392,7 @@ struct clip_ctx { std::vector backend_buft; ggml_backend_t backend = nullptr; - + ggml_backend_t backend_cpu; ggml_backend_buffer_ptr buf; int max_nodes = 8192; @@ -404,29 +404,35 @@ struct clip_ctx { clip_ctx(clip_context_params & ctx_params) { + backend_cpu = ggml_backend_cpu_init(); //always has CPU backend + if (!backend_cpu) { + throw std::runtime_error("failed to initialize CPU backend"); + } + if(enable_gpu_clip) { #ifdef GGML_USE_CUDA backend = ggml_backend_cuda_init(0); - LOG_INF("%s: CLIP using CUDA backend\n", __func__); #endif #ifdef GGML_USE_METAL backend = ggml_backend_metal_init(); - LOG_INF("%s: CLIP using Metal backend\n", __func__); #endif #ifdef GGML_USE_VULKAN backend = ggml_backend_vk_init(0); - LOG_INF("%s: CLIP using Vulkan backend\n", __func__); #endif } - if (!backend) { - backend = ggml_backend_cpu_init(); + if (backend) { + LOG_INF("%s: CLIP using %s backend\n", __func__, ggml_backend_name(backend)); + backend_ptrs.push_back(backend); + backend_buft.push_back(ggml_backend_get_default_buffer_type(backend)); + } else { + backend = backend_cpu; LOG_INF("%s: CLIP using CPU backend\n", __func__); } - backend_ptrs.push_back(backend); - backend_buft.push_back(ggml_backend_get_default_buffer_type(backend)); + backend_ptrs.push_back(backend_cpu); + backend_buft.push_back(ggml_backend_get_default_buffer_type(backend_cpu)); sched.reset( ggml_backend_sched_new(backend_ptrs.data(), backend_buft.data(), backend_ptrs.size(), 8192, false, true) @@ -435,6 +441,9 @@ struct clip_ctx { ~clip_ctx() { ggml_backend_free(backend); + if (backend != backend_cpu) { + ggml_backend_free(backend_cpu); + } } // this function is added so that we don't change too much of the existing code