mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
temporarily make qwenv2l use clip on cpu for vulkan and macos
This commit is contained in:
parent
fc52a38a25
commit
b7d3274523
3 changed files with 17 additions and 0 deletions
|
@ -1111,6 +1111,12 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool enable_gpu_clip = true;
|
||||||
|
void set_clip_uses_gpu(bool usegpu)
|
||||||
|
{
|
||||||
|
enable_gpu_clip = usegpu;
|
||||||
|
}
|
||||||
|
|
||||||
// read and create ggml_context containing the tensors and their data
|
// read and create ggml_context containing the tensors and their data
|
||||||
struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
struct ggml_context * meta = NULL;
|
struct ggml_context * meta = NULL;
|
||||||
|
@ -1225,6 +1231,8 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(enable_gpu_clip)
|
||||||
|
{
|
||||||
#ifdef GGML_USE_CUDA
|
#ifdef GGML_USE_CUDA
|
||||||
new_clip->backend = ggml_backend_cuda_init(0);
|
new_clip->backend = ggml_backend_cuda_init(0);
|
||||||
LOG_INF("%s: CLIP using CUDA backend\n", __func__);
|
LOG_INF("%s: CLIP using CUDA backend\n", __func__);
|
||||||
|
@ -1249,6 +1257,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
new_clip->backend = ggml_backend_sycl_init(0);
|
new_clip->backend = ggml_backend_sycl_init(0);
|
||||||
LOG_INF("%s: CLIP using SYCL backend\n", __func__);
|
LOG_INF("%s: CLIP using SYCL backend\n", __func__);
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
if (!new_clip->backend) {
|
if (!new_clip->backend) {
|
||||||
new_clip->backend = ggml_backend_cpu_init();
|
new_clip->backend = ggml_backend_cpu_init();
|
||||||
|
|
|
@ -93,6 +93,7 @@ CLIP_API bool clip_is_qwen2vl(const struct clip_ctx * ctx);
|
||||||
|
|
||||||
CLIP_API bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec);
|
CLIP_API bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec);
|
||||||
|
|
||||||
|
CLIP_API void set_clip_uses_gpu(bool usegpu);
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -2316,6 +2316,13 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
if(mmproj_filename != "" && file_format==FileFormat::GGUF_GENERIC)
|
if(mmproj_filename != "" && file_format==FileFormat::GGUF_GENERIC)
|
||||||
{
|
{
|
||||||
printf("\nAttempting to apply Multimodal Projector: %s\n", mmproj_filename.c_str());
|
printf("\nAttempting to apply Multimodal Projector: %s\n", mmproj_filename.c_str());
|
||||||
|
#if defined(GGML_USE_VULKAN) || defined(GGML_USE_METAL)
|
||||||
|
if(file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL)
|
||||||
|
{
|
||||||
|
set_clip_uses_gpu(false);
|
||||||
|
printf("Clip will use CPU for this model!\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
clp_ctx = clip_model_load(mmproj_filename.c_str(), /*verbosity=*/ 1);
|
clp_ctx = clip_model_load(mmproj_filename.c_str(), /*verbosity=*/ 1);
|
||||||
if(clp_ctx == nullptr) {
|
if(clp_ctx == nullptr) {
|
||||||
fprintf(stderr, "%s: error: failed to load mmproj model!\n", __func__);
|
fprintf(stderr, "%s: error: failed to load mmproj model!\n", __func__);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue