diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 29a15086c..e0be9e4f3 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -1111,6 +1111,12 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 return gf; } +bool enable_gpu_clip = true; +void set_clip_uses_gpu(bool usegpu) +{ + enable_gpu_clip = usegpu; +} + // read and create ggml_context containing the tensors and their data struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { struct ggml_context * meta = NULL; @@ -1225,6 +1231,8 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { } } +if(enable_gpu_clip) +{ #ifdef GGML_USE_CUDA new_clip->backend = ggml_backend_cuda_init(0); LOG_INF("%s: CLIP using CUDA backend\n", __func__); @@ -1249,6 +1257,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { new_clip->backend = ggml_backend_sycl_init(0); LOG_INF("%s: CLIP using SYCL backend\n", __func__); #endif +} if (!new_clip->backend) { new_clip->backend = ggml_backend_cpu_init(); diff --git a/examples/llava/clip.h b/examples/llava/clip.h index 1603edd26..d802ec410 100644 --- a/examples/llava/clip.h +++ b/examples/llava/clip.h @@ -93,6 +93,7 @@ CLIP_API bool clip_is_qwen2vl(const struct clip_ctx * ctx); CLIP_API bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec); +CLIP_API void set_clip_uses_gpu(bool usegpu); #ifdef __cplusplus } #endif diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index ccb4e7100..63cc28814 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -2316,6 +2316,13 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in if(mmproj_filename != "" && file_format==FileFormat::GGUF_GENERIC) { printf("\nAttempting to apply Multimodal Projector: %s\n", mmproj_filename.c_str()); + #if defined(GGML_USE_VULKAN) || defined(GGML_USE_METAL) + if(file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL) + { + set_clip_uses_gpu(false); + printf("Clip will use CPU for this model!\n"); + } + #endif clp_ctx = clip_model_load(mmproj_filename.c_str(), /*verbosity=*/ 1); if(clp_ctx == nullptr) { fprintf(stderr, "%s: error: failed to load mmproj model!\n", __func__);